check in v3.8.2 source
This commit is contained in:
598
crypto/rc4/rc4-elf-x86_64.S
Normal file
598
crypto/rc4/rc4-elf-x86_64.S
Normal file
@@ -0,0 +1,598 @@
|
||||
#include "x86_arch.h"
|
||||
.text
|
||||
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
|
||||
.globl RC4
|
||||
.type RC4,@function
|
||||
.align 16
|
||||
RC4:
|
||||
endbr64
|
||||
orq %rsi,%rsi
|
||||
jne .Lentry
|
||||
retq
|
||||
.Lentry:
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
.Lprologue:
|
||||
movq %rsi,%r11
|
||||
movq %rdx,%r12
|
||||
movq %rcx,%r13
|
||||
xorq %r10,%r10
|
||||
xorq %rcx,%rcx
|
||||
|
||||
leaq 8(%rdi),%rdi
|
||||
movb -8(%rdi),%r10b
|
||||
movb -4(%rdi),%cl
|
||||
cmpl $-1,256(%rdi)
|
||||
je .LRC4_CHAR
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
xorq %rbx,%rbx
|
||||
incb %r10b
|
||||
subq %r10,%rbx
|
||||
subq %r12,%r13
|
||||
movl (%rdi,%r10,4),%eax
|
||||
testq $-16,%r11
|
||||
jz .Lloop1
|
||||
btl $IA32CAP_BIT0_INTEL,%r8d
|
||||
jc .Lintel
|
||||
andq $7,%rbx
|
||||
leaq 1(%r10),%rsi
|
||||
jz .Loop8
|
||||
subq %rbx,%r11
|
||||
.Loop8_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop8_warmup
|
||||
|
||||
leaq 1(%r10),%rsi
|
||||
jmp .Loop8
|
||||
.align 16
|
||||
.Loop8:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 0(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,0(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,4(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 8(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,8(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 12(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,12(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 16(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,16(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 20(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,20(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 24(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,24(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%sil
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl -4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,28(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%r10b
|
||||
rorq $8,%r8
|
||||
subq $8,%r11
|
||||
|
||||
xorq (%r12),%r8
|
||||
movq %r8,(%r13,%r12,1)
|
||||
leaq 8(%r12),%r12
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Loop8
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lintel:
|
||||
testq $-32,%r11
|
||||
jz .Lloop1
|
||||
andq $15,%rbx
|
||||
jz .Loop16_is_hot
|
||||
subq %rbx,%r11
|
||||
.Loop16_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop16_warmup
|
||||
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
movb %bl,%cl
|
||||
|
||||
.Loop16_is_hot:
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
jmp .Loop16_enter
|
||||
.align 16
|
||||
.Loop16:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm2
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
pxor %xmm1,%xmm2
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
.Loop16_enter:
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm1,%xmm1
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 8(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,4(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $0,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 12(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,8(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $1,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 16(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,12(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $1,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 20(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,16(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $2,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 24(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,20(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $2,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 28(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,24(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $3,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 32(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,28(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $3,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 36(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,32(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $4,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 40(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,36(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $4,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 44(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,40(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $5,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 48(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,44(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $5,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 52(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,48(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $6,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 56(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,52(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $6,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 60(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,56(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $7,(%rdi,%rax,4),%xmm0
|
||||
addb $16,%r10b
|
||||
movdqu (%r12),%xmm2
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,60(%rsi)
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
pinsrw $7,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rsi),%eax
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
subq $16,%r11
|
||||
movb %bl,%cl
|
||||
testq $-16,%r11
|
||||
jnz .Loop16
|
||||
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm2
|
||||
pxor %xmm1,%xmm2
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lloop1:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %r11
|
||||
jnz .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.LRC4_CHAR:
|
||||
addb $1,%r10b
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
testq $-8,%r11
|
||||
jz .Lcloop1
|
||||
jmp .Lcloop8
|
||||
.align 16
|
||||
.Lcloop8:
|
||||
movl (%r12),%r8d
|
||||
movl 4(%r12),%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov0
|
||||
movq %rax,%rbx
|
||||
.Lcmov0:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov1
|
||||
movq %rbx,%rax
|
||||
.Lcmov1:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov2
|
||||
movq %rax,%rbx
|
||||
.Lcmov2:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov3
|
||||
movq %rbx,%rax
|
||||
.Lcmov3:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov4
|
||||
movq %rax,%rbx
|
||||
.Lcmov4:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov5
|
||||
movq %rbx,%rax
|
||||
.Lcmov5:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov6
|
||||
movq %rax,%rbx
|
||||
.Lcmov6:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov7
|
||||
movq %rbx,%rax
|
||||
.Lcmov7:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
leaq -8(%r11),%r11
|
||||
movl %r8d,(%r13)
|
||||
leaq 8(%r12),%r12
|
||||
movl %r9d,4(%r13)
|
||||
leaq 8(%r13),%r13
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Lcloop8
|
||||
cmpq $0,%r11
|
||||
jne .Lcloop1
|
||||
jmp .Lexit
|
||||
.align 16
|
||||
.Lcloop1:
|
||||
addb %al,%cl
|
||||
movzbl %cl,%ecx
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
addb %al,%dl
|
||||
addb $1,%r10b
|
||||
movzbl %dl,%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%rdx,1),%edx
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
xorb (%r12),%dl
|
||||
leaq 1(%r12),%r12
|
||||
movb %dl,(%r13)
|
||||
leaq 1(%r13),%r13
|
||||
subq $1,%r11
|
||||
jnz .Lcloop1
|
||||
jmp .Lexit
|
||||
|
||||
.align 16
|
||||
.Lexit:
|
||||
subb $1,%r10b
|
||||
movl %r10d,-8(%rdi)
|
||||
movl %ecx,-4(%rdi)
|
||||
|
||||
movq (%rsp),%r13
|
||||
movq 8(%rsp),%r12
|
||||
movq 16(%rsp),%rbx
|
||||
addq $24,%rsp
|
||||
.Lepilogue:
|
||||
retq
|
||||
.size RC4,.-RC4
|
||||
.globl RC4_set_key
|
||||
.type RC4_set_key,@function
|
||||
.align 16
|
||||
RC4_set_key:
|
||||
endbr64
|
||||
leaq 8(%rdi),%rdi
|
||||
leaq (%rdx,%rsi,1),%rdx
|
||||
negq %rsi
|
||||
movq %rsi,%rcx
|
||||
xorl %eax,%eax
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
btl $IA32CAP_BIT0_INTELP4,%r8d
|
||||
jc .Lc1stloop
|
||||
jmp .Lw1stloop
|
||||
|
||||
.align 16
|
||||
.Lw1stloop:
|
||||
movl %eax,(%rdi,%rax,4)
|
||||
addb $1,%al
|
||||
jnc .Lw1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.align 16
|
||||
.Lw2ndloop:
|
||||
movl (%rdi,%r9,4),%r10d
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movl (%rdi,%r8,4),%r11d
|
||||
cmovzq %rcx,%rsi
|
||||
movl %r10d,(%rdi,%r8,4)
|
||||
movl %r11d,(%rdi,%r9,4)
|
||||
addb $1,%r9b
|
||||
jnc .Lw2ndloop
|
||||
jmp .Lexit_key
|
||||
|
||||
.align 16
|
||||
.Lc1stloop:
|
||||
movb %al,(%rdi,%rax,1)
|
||||
addb $1,%al
|
||||
jnc .Lc1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.align 16
|
||||
.Lc2ndloop:
|
||||
movb (%rdi,%r9,1),%r10b
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movb (%rdi,%r8,1),%r11b
|
||||
jnz .Lcnowrap
|
||||
movq %rcx,%rsi
|
||||
.Lcnowrap:
|
||||
movb %r10b,(%rdi,%r8,1)
|
||||
movb %r11b,(%rdi,%r9,1)
|
||||
addb $1,%r9b
|
||||
jnc .Lc2ndloop
|
||||
movl $-1,256(%rdi)
|
||||
|
||||
.align 16
|
||||
.Lexit_key:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%rdi)
|
||||
movl %eax,-4(%rdi)
|
||||
retq
|
||||
.size RC4_set_key,.-RC4_set_key
|
||||
#if defined(HAVE_GNU_STACK)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
592
crypto/rc4/rc4-macosx-x86_64.S
Normal file
592
crypto/rc4/rc4-macosx-x86_64.S
Normal file
@@ -0,0 +1,592 @@
|
||||
#include "x86_arch.h"
|
||||
.text
|
||||
|
||||
.private_extern _OPENSSL_ia32cap_P
|
||||
|
||||
.globl _RC4
|
||||
|
||||
.p2align 4
|
||||
_RC4: orq %rsi,%rsi
|
||||
jne L$entry
|
||||
retq
|
||||
L$entry:
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
L$prologue:
|
||||
movq %rsi,%r11
|
||||
movq %rdx,%r12
|
||||
movq %rcx,%r13
|
||||
xorq %r10,%r10
|
||||
xorq %rcx,%rcx
|
||||
|
||||
leaq 8(%rdi),%rdi
|
||||
movb -8(%rdi),%r10b
|
||||
movb -4(%rdi),%cl
|
||||
cmpl $-1,256(%rdi)
|
||||
je L$RC4_CHAR
|
||||
movl _OPENSSL_ia32cap_P(%rip),%r8d
|
||||
xorq %rbx,%rbx
|
||||
incb %r10b
|
||||
subq %r10,%rbx
|
||||
subq %r12,%r13
|
||||
movl (%rdi,%r10,4),%eax
|
||||
testq $-16,%r11
|
||||
jz L$loop1
|
||||
btl $IA32CAP_BIT0_INTEL,%r8d
|
||||
jc L$intel
|
||||
andq $7,%rbx
|
||||
leaq 1(%r10),%rsi
|
||||
jz L$oop8
|
||||
subq %rbx,%r11
|
||||
L$oop8_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz L$oop8_warmup
|
||||
|
||||
leaq 1(%r10),%rsi
|
||||
jmp L$oop8
|
||||
.p2align 4
|
||||
L$oop8:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 0(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,0(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,4(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 8(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,8(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 12(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,12(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 16(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,16(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 20(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,20(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 24(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,24(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%sil
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl -4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,28(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%r10b
|
||||
rorq $8,%r8
|
||||
subq $8,%r11
|
||||
|
||||
xorq (%r12),%r8
|
||||
movq %r8,(%r13,%r12,1)
|
||||
leaq 8(%r12),%r12
|
||||
|
||||
testq $-8,%r11
|
||||
jnz L$oop8
|
||||
cmpq $0,%r11
|
||||
jne L$loop1
|
||||
jmp L$exit
|
||||
|
||||
.p2align 4
|
||||
L$intel:
|
||||
testq $-32,%r11
|
||||
jz L$loop1
|
||||
andq $15,%rbx
|
||||
jz L$oop16_is_hot
|
||||
subq %rbx,%r11
|
||||
L$oop16_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz L$oop16_warmup
|
||||
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
movb %bl,%cl
|
||||
|
||||
L$oop16_is_hot:
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
jmp L$oop16_enter
|
||||
.p2align 4
|
||||
L$oop16:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm2
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
pxor %xmm1,%xmm2
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
L$oop16_enter:
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm1,%xmm1
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 8(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,4(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $0,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 12(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,8(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $1,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 16(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,12(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $1,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 20(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,16(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $2,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 24(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,20(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $2,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 28(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,24(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $3,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 32(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,28(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $3,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 36(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,32(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $4,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 40(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,36(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $4,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 44(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,40(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $5,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 48(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,44(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $5,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 52(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,48(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $6,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 56(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,52(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $6,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 60(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,56(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $7,(%rdi,%rax,4),%xmm0
|
||||
addb $16,%r10b
|
||||
movdqu (%r12),%xmm2
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,60(%rsi)
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
pinsrw $7,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rsi),%eax
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
subq $16,%r11
|
||||
movb %bl,%cl
|
||||
testq $-16,%r11
|
||||
jnz L$oop16
|
||||
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm2
|
||||
pxor %xmm1,%xmm2
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
|
||||
cmpq $0,%r11
|
||||
jne L$loop1
|
||||
jmp L$exit
|
||||
|
||||
.p2align 4
|
||||
L$loop1:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %r11
|
||||
jnz L$loop1
|
||||
jmp L$exit
|
||||
|
||||
.p2align 4
|
||||
L$RC4_CHAR:
|
||||
addb $1,%r10b
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
testq $-8,%r11
|
||||
jz L$cloop1
|
||||
jmp L$cloop8
|
||||
.p2align 4
|
||||
L$cloop8:
|
||||
movl (%r12),%r8d
|
||||
movl 4(%r12),%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne L$cmov0
|
||||
movq %rax,%rbx
|
||||
L$cmov0:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne L$cmov1
|
||||
movq %rbx,%rax
|
||||
L$cmov1:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne L$cmov2
|
||||
movq %rax,%rbx
|
||||
L$cmov2:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne L$cmov3
|
||||
movq %rbx,%rax
|
||||
L$cmov3:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne L$cmov4
|
||||
movq %rax,%rbx
|
||||
L$cmov4:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne L$cmov5
|
||||
movq %rbx,%rax
|
||||
L$cmov5:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne L$cmov6
|
||||
movq %rax,%rbx
|
||||
L$cmov6:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne L$cmov7
|
||||
movq %rbx,%rax
|
||||
L$cmov7:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
leaq -8(%r11),%r11
|
||||
movl %r8d,(%r13)
|
||||
leaq 8(%r12),%r12
|
||||
movl %r9d,4(%r13)
|
||||
leaq 8(%r13),%r13
|
||||
|
||||
testq $-8,%r11
|
||||
jnz L$cloop8
|
||||
cmpq $0,%r11
|
||||
jne L$cloop1
|
||||
jmp L$exit
|
||||
.p2align 4
|
||||
L$cloop1:
|
||||
addb %al,%cl
|
||||
movzbl %cl,%ecx
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
addb %al,%dl
|
||||
addb $1,%r10b
|
||||
movzbl %dl,%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%rdx,1),%edx
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
xorb (%r12),%dl
|
||||
leaq 1(%r12),%r12
|
||||
movb %dl,(%r13)
|
||||
leaq 1(%r13),%r13
|
||||
subq $1,%r11
|
||||
jnz L$cloop1
|
||||
jmp L$exit
|
||||
|
||||
.p2align 4
|
||||
L$exit:
|
||||
subb $1,%r10b
|
||||
movl %r10d,-8(%rdi)
|
||||
movl %ecx,-4(%rdi)
|
||||
|
||||
movq (%rsp),%r13
|
||||
movq 8(%rsp),%r12
|
||||
movq 16(%rsp),%rbx
|
||||
addq $24,%rsp
|
||||
L$epilogue:
|
||||
retq
|
||||
|
||||
.globl _RC4_set_key
|
||||
|
||||
.p2align 4
|
||||
_RC4_set_key:
|
||||
leaq 8(%rdi),%rdi
|
||||
leaq (%rdx,%rsi,1),%rdx
|
||||
negq %rsi
|
||||
movq %rsi,%rcx
|
||||
xorl %eax,%eax
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
|
||||
movl _OPENSSL_ia32cap_P(%rip),%r8d
|
||||
btl $IA32CAP_BIT0_INTELP4,%r8d
|
||||
jc L$c1stloop
|
||||
jmp L$w1stloop
|
||||
|
||||
.p2align 4
|
||||
L$w1stloop:
|
||||
movl %eax,(%rdi,%rax,4)
|
||||
addb $1,%al
|
||||
jnc L$w1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.p2align 4
|
||||
L$w2ndloop:
|
||||
movl (%rdi,%r9,4),%r10d
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movl (%rdi,%r8,4),%r11d
|
||||
cmovzq %rcx,%rsi
|
||||
movl %r10d,(%rdi,%r8,4)
|
||||
movl %r11d,(%rdi,%r9,4)
|
||||
addb $1,%r9b
|
||||
jnc L$w2ndloop
|
||||
jmp L$exit_key
|
||||
|
||||
.p2align 4
|
||||
L$c1stloop:
|
||||
movb %al,(%rdi,%rax,1)
|
||||
addb $1,%al
|
||||
jnc L$c1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.p2align 4
|
||||
L$c2ndloop:
|
||||
movb (%rdi,%r9,1),%r10b
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movb (%rdi,%r8,1),%r11b
|
||||
jnz L$cnowrap
|
||||
movq %rcx,%rsi
|
||||
L$cnowrap:
|
||||
movb %r10b,(%rdi,%r8,1)
|
||||
movb %r11b,(%rdi,%r9,1)
|
||||
addb $1,%r9b
|
||||
jnc L$c2ndloop
|
||||
movl $-1,256(%rdi)
|
||||
|
||||
.p2align 4
|
||||
L$exit_key:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%rdi)
|
||||
movl %eax,-4(%rdi)
|
||||
retq
|
||||
|
691
crypto/rc4/rc4-masm-x86_64.S
Normal file
691
crypto/rc4/rc4-masm-x86_64.S
Normal file
@@ -0,0 +1,691 @@
|
||||
; 1 "crypto/rc4/rc4-masm-x86_64.S.tmp"
|
||||
; 1 "<built-in>" 1
|
||||
; 1 "<built-in>" 3
|
||||
; 399 "<built-in>" 3
|
||||
; 1 "<command line>" 1
|
||||
; 1 "<built-in>" 2
|
||||
; 1 "crypto/rc4/rc4-masm-x86_64.S.tmp" 2
|
||||
OPTION DOTNAME
|
||||
|
||||
; 1 "./crypto/x86_arch.h" 1
|
||||
|
||||
|
||||
; 16 "./crypto/x86_arch.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
; 40 "./crypto/x86_arch.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
; 3 "crypto/rc4/rc4-masm-x86_64.S.tmp" 2
|
||||
.text$ SEGMENT ALIGN(64) 'CODE'
|
||||
EXTERN OPENSSL_ia32cap_P:NEAR
|
||||
|
||||
|
||||
PUBLIC RC4
|
||||
|
||||
ALIGN 16
|
||||
RC4 PROC PUBLIC
|
||||
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
||||
mov QWORD PTR[16+rsp],rsi
|
||||
mov rax,rsp
|
||||
$L$SEH_begin_RC4::
|
||||
mov rdi,rcx
|
||||
mov rsi,rdx
|
||||
mov rdx,r8
|
||||
mov rcx,r9
|
||||
|
||||
or rsi,rsi
|
||||
jne $L$entry
|
||||
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
||||
mov rsi,QWORD PTR[16+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$entry::
|
||||
push rbx
|
||||
push r12
|
||||
push r13
|
||||
$L$prologue::
|
||||
mov r11,rsi
|
||||
mov r12,rdx
|
||||
mov r13,rcx
|
||||
xor r10,r10
|
||||
xor rcx,rcx
|
||||
|
||||
lea rdi,QWORD PTR[8+rdi]
|
||||
mov r10b,BYTE PTR[((-8))+rdi]
|
||||
mov cl,BYTE PTR[((-4))+rdi]
|
||||
cmp DWORD PTR[256+rdi],-1
|
||||
je $L$RC4_CHAR
|
||||
mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
|
||||
xor rbx,rbx
|
||||
inc r10b
|
||||
sub rbx,r10
|
||||
sub r13,r12
|
||||
mov eax,DWORD PTR[r10*4+rdi]
|
||||
test r11,-16
|
||||
jz $L$loop1
|
||||
bt r8d,30
|
||||
jc $L$intel
|
||||
and rbx,7
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
jz $L$oop8
|
||||
sub r11,rbx
|
||||
$L$oop8_warmup::
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov DWORD PTR[r10*4+rdi],edx
|
||||
add al,dl
|
||||
inc r10b
|
||||
mov edx,DWORD PTR[rax*4+rdi]
|
||||
mov eax,DWORD PTR[r10*4+rdi]
|
||||
xor dl,BYTE PTR[r12]
|
||||
mov BYTE PTR[r12*1+r13],dl
|
||||
lea r12,QWORD PTR[1+r12]
|
||||
dec rbx
|
||||
jnz $L$oop8_warmup
|
||||
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
jmp $L$oop8
|
||||
ALIGN 16
|
||||
$L$oop8::
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov ebx,DWORD PTR[rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[r10*4+rdi],edx
|
||||
add dl,al
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,bl
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
mov eax,DWORD PTR[4+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[4+r10*4+rdi],edx
|
||||
add dl,bl
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov ebx,DWORD PTR[8+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[8+r10*4+rdi],edx
|
||||
add dl,al
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,bl
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
mov eax,DWORD PTR[12+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[12+r10*4+rdi],edx
|
||||
add dl,bl
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov ebx,DWORD PTR[16+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[16+r10*4+rdi],edx
|
||||
add dl,al
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,bl
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
mov eax,DWORD PTR[20+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[20+r10*4+rdi],edx
|
||||
add dl,bl
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov ebx,DWORD PTR[24+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[24+r10*4+rdi],edx
|
||||
add dl,al
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add sil,8
|
||||
add cl,bl
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
mov eax,DWORD PTR[((-4))+rsi*4+rdi]
|
||||
ror r8,8
|
||||
mov DWORD PTR[28+r10*4+rdi],edx
|
||||
add dl,bl
|
||||
mov r8b,BYTE PTR[rdx*4+rdi]
|
||||
add r10b,8
|
||||
ror r8,8
|
||||
sub r11,8
|
||||
|
||||
xor r8,QWORD PTR[r12]
|
||||
mov QWORD PTR[r12*1+r13],r8
|
||||
lea r12,QWORD PTR[8+r12]
|
||||
|
||||
test r11,-8
|
||||
jnz $L$oop8
|
||||
cmp r11,0
|
||||
jne $L$loop1
|
||||
jmp $L$exit
|
||||
|
||||
ALIGN 16
|
||||
$L$intel::
|
||||
test r11,-32
|
||||
jz $L$loop1
|
||||
and rbx,15
|
||||
jz $L$oop16_is_hot
|
||||
sub r11,rbx
|
||||
$L$oop16_warmup::
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov DWORD PTR[r10*4+rdi],edx
|
||||
add al,dl
|
||||
inc r10b
|
||||
mov edx,DWORD PTR[rax*4+rdi]
|
||||
mov eax,DWORD PTR[r10*4+rdi]
|
||||
xor dl,BYTE PTR[r12]
|
||||
mov BYTE PTR[r12*1+r13],dl
|
||||
lea r12,QWORD PTR[1+r12]
|
||||
dec rbx
|
||||
jnz $L$oop16_warmup
|
||||
|
||||
mov rbx,rcx
|
||||
xor rcx,rcx
|
||||
mov cl,bl
|
||||
|
||||
$L$oop16_is_hot::
|
||||
lea rsi,QWORD PTR[r10*4+rdi]
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
pxor xmm0,xmm0
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[4+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],0
|
||||
jmp $L$oop16_enter
|
||||
ALIGN 16
|
||||
$L$oop16::
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
pxor xmm2,xmm0
|
||||
psllq xmm1,8
|
||||
pxor xmm0,xmm0
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[4+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[rsi],edx
|
||||
pxor xmm2,xmm1
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],0
|
||||
movdqu XMMWORD PTR[r12*1+r13],xmm2
|
||||
lea r12,QWORD PTR[16+r12]
|
||||
$L$oop16_enter::
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
pxor xmm1,xmm1
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[8+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[4+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],0
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[12+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[8+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],1
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[16+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[12+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],1
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[20+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[16+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],2
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[24+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[20+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],2
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[28+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[24+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],3
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[32+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[28+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],3
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[36+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[32+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],4
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[40+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[36+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],4
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[44+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[40+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],5
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[48+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[44+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],5
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[52+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[48+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],6
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
mov eax,DWORD PTR[56+rsi]
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[52+rsi],edx
|
||||
add cl,al
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],6
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
add al,dl
|
||||
mov ebx,DWORD PTR[60+rsi]
|
||||
movzx eax,al
|
||||
mov DWORD PTR[56+rsi],edx
|
||||
add cl,bl
|
||||
pinsrw xmm0,WORD PTR[rax*4+rdi],7
|
||||
add r10b,16
|
||||
movdqu xmm2,XMMWORD PTR[r12]
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],ebx
|
||||
add bl,dl
|
||||
movzx ebx,bl
|
||||
mov DWORD PTR[60+rsi],edx
|
||||
lea rsi,QWORD PTR[r10*4+rdi]
|
||||
pinsrw xmm1,WORD PTR[rbx*4+rdi],7
|
||||
mov eax,DWORD PTR[rsi]
|
||||
mov rbx,rcx
|
||||
xor rcx,rcx
|
||||
sub r11,16
|
||||
mov cl,bl
|
||||
test r11,-16
|
||||
jnz $L$oop16
|
||||
|
||||
psllq xmm1,8
|
||||
pxor xmm2,xmm0
|
||||
pxor xmm2,xmm1
|
||||
movdqu XMMWORD PTR[r12*1+r13],xmm2
|
||||
lea r12,QWORD PTR[16+r12]
|
||||
|
||||
cmp r11,0
|
||||
jne $L$loop1
|
||||
jmp $L$exit
|
||||
|
||||
ALIGN 16
|
||||
$L$loop1::
|
||||
add cl,al
|
||||
mov edx,DWORD PTR[rcx*4+rdi]
|
||||
mov DWORD PTR[rcx*4+rdi],eax
|
||||
mov DWORD PTR[r10*4+rdi],edx
|
||||
add al,dl
|
||||
inc r10b
|
||||
mov edx,DWORD PTR[rax*4+rdi]
|
||||
mov eax,DWORD PTR[r10*4+rdi]
|
||||
xor dl,BYTE PTR[r12]
|
||||
mov BYTE PTR[r12*1+r13],dl
|
||||
lea r12,QWORD PTR[1+r12]
|
||||
dec r11
|
||||
jnz $L$loop1
|
||||
jmp $L$exit
|
||||
|
||||
ALIGN 16
|
||||
$L$RC4_CHAR::
|
||||
add r10b,1
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
test r11,-8
|
||||
jz $L$cloop1
|
||||
jmp $L$cloop8
|
||||
ALIGN 16
|
||||
$L$cloop8::
|
||||
mov r8d,DWORD PTR[r12]
|
||||
mov r9d,DWORD PTR[4+r12]
|
||||
add cl,al
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx esi,sil
|
||||
movzx ebx,BYTE PTR[rsi*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],al
|
||||
cmp rcx,rsi
|
||||
mov BYTE PTR[r10*1+rdi],dl
|
||||
jne $L$cmov0
|
||||
mov rbx,rax
|
||||
$L$cmov0::
|
||||
add dl,al
|
||||
xor r8b,BYTE PTR[rdx*1+rdi]
|
||||
ror r8d,8
|
||||
add cl,bl
|
||||
lea r10,QWORD PTR[1+rsi]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx r10d,r10b
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],bl
|
||||
cmp rcx,r10
|
||||
mov BYTE PTR[rsi*1+rdi],dl
|
||||
jne $L$cmov1
|
||||
mov rax,rbx
|
||||
$L$cmov1::
|
||||
add dl,bl
|
||||
xor r8b,BYTE PTR[rdx*1+rdi]
|
||||
ror r8d,8
|
||||
add cl,al
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx esi,sil
|
||||
movzx ebx,BYTE PTR[rsi*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],al
|
||||
cmp rcx,rsi
|
||||
mov BYTE PTR[r10*1+rdi],dl
|
||||
jne $L$cmov2
|
||||
mov rbx,rax
|
||||
$L$cmov2::
|
||||
add dl,al
|
||||
xor r8b,BYTE PTR[rdx*1+rdi]
|
||||
ror r8d,8
|
||||
add cl,bl
|
||||
lea r10,QWORD PTR[1+rsi]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx r10d,r10b
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],bl
|
||||
cmp rcx,r10
|
||||
mov BYTE PTR[rsi*1+rdi],dl
|
||||
jne $L$cmov3
|
||||
mov rax,rbx
|
||||
$L$cmov3::
|
||||
add dl,bl
|
||||
xor r8b,BYTE PTR[rdx*1+rdi]
|
||||
ror r8d,8
|
||||
add cl,al
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx esi,sil
|
||||
movzx ebx,BYTE PTR[rsi*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],al
|
||||
cmp rcx,rsi
|
||||
mov BYTE PTR[r10*1+rdi],dl
|
||||
jne $L$cmov4
|
||||
mov rbx,rax
|
||||
$L$cmov4::
|
||||
add dl,al
|
||||
xor r9b,BYTE PTR[rdx*1+rdi]
|
||||
ror r9d,8
|
||||
add cl,bl
|
||||
lea r10,QWORD PTR[1+rsi]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx r10d,r10b
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],bl
|
||||
cmp rcx,r10
|
||||
mov BYTE PTR[rsi*1+rdi],dl
|
||||
jne $L$cmov5
|
||||
mov rax,rbx
|
||||
$L$cmov5::
|
||||
add dl,bl
|
||||
xor r9b,BYTE PTR[rdx*1+rdi]
|
||||
ror r9d,8
|
||||
add cl,al
|
||||
lea rsi,QWORD PTR[1+r10]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx esi,sil
|
||||
movzx ebx,BYTE PTR[rsi*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],al
|
||||
cmp rcx,rsi
|
||||
mov BYTE PTR[r10*1+rdi],dl
|
||||
jne $L$cmov6
|
||||
mov rbx,rax
|
||||
$L$cmov6::
|
||||
add dl,al
|
||||
xor r9b,BYTE PTR[rdx*1+rdi]
|
||||
ror r9d,8
|
||||
add cl,bl
|
||||
lea r10,QWORD PTR[1+rsi]
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
movzx r10d,r10b
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],bl
|
||||
cmp rcx,r10
|
||||
mov BYTE PTR[rsi*1+rdi],dl
|
||||
jne $L$cmov7
|
||||
mov rax,rbx
|
||||
$L$cmov7::
|
||||
add dl,bl
|
||||
xor r9b,BYTE PTR[rdx*1+rdi]
|
||||
ror r9d,8
|
||||
lea r11,QWORD PTR[((-8))+r11]
|
||||
mov DWORD PTR[r13],r8d
|
||||
lea r12,QWORD PTR[8+r12]
|
||||
mov DWORD PTR[4+r13],r9d
|
||||
lea r13,QWORD PTR[8+r13]
|
||||
|
||||
test r11,-8
|
||||
jnz $L$cloop8
|
||||
cmp r11,0
|
||||
jne $L$cloop1
|
||||
jmp $L$exit
|
||||
ALIGN 16
|
||||
$L$cloop1::
|
||||
add cl,al
|
||||
movzx ecx,cl
|
||||
movzx edx,BYTE PTR[rcx*1+rdi]
|
||||
mov BYTE PTR[rcx*1+rdi],al
|
||||
mov BYTE PTR[r10*1+rdi],dl
|
||||
add dl,al
|
||||
add r10b,1
|
||||
movzx edx,dl
|
||||
movzx r10d,r10b
|
||||
movzx edx,BYTE PTR[rdx*1+rdi]
|
||||
movzx eax,BYTE PTR[r10*1+rdi]
|
||||
xor dl,BYTE PTR[r12]
|
||||
lea r12,QWORD PTR[1+r12]
|
||||
mov BYTE PTR[r13],dl
|
||||
lea r13,QWORD PTR[1+r13]
|
||||
sub r11,1
|
||||
jnz $L$cloop1
|
||||
jmp $L$exit
|
||||
|
||||
ALIGN 16
|
||||
$L$exit::
|
||||
sub r10b,1
|
||||
mov DWORD PTR[((-8))+rdi],r10d
|
||||
mov DWORD PTR[((-4))+rdi],ecx
|
||||
|
||||
mov r13,QWORD PTR[rsp]
|
||||
mov r12,QWORD PTR[8+rsp]
|
||||
mov rbx,QWORD PTR[16+rsp]
|
||||
add rsp,24
|
||||
$L$epilogue::
|
||||
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
||||
mov rsi,QWORD PTR[16+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$SEH_end_RC4::
|
||||
RC4 ENDP
|
||||
PUBLIC RC4_set_key
|
||||
|
||||
ALIGN 16
|
||||
RC4_set_key PROC PUBLIC
|
||||
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
||||
mov QWORD PTR[16+rsp],rsi
|
||||
mov rax,rsp
|
||||
$L$SEH_begin_RC4_set_key::
|
||||
mov rdi,rcx
|
||||
mov rsi,rdx
|
||||
mov rdx,r8
|
||||
|
||||
|
||||
lea rdi,QWORD PTR[8+rdi]
|
||||
lea rdx,QWORD PTR[rsi*1+rdx]
|
||||
neg rsi
|
||||
mov rcx,rsi
|
||||
xor eax,eax
|
||||
xor r9,r9
|
||||
xor r10,r10
|
||||
xor r11,r11
|
||||
|
||||
mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
|
||||
bt r8d,20
|
||||
jc $L$c1stloop
|
||||
jmp $L$w1stloop
|
||||
|
||||
ALIGN 16
|
||||
$L$w1stloop::
|
||||
mov DWORD PTR[rax*4+rdi],eax
|
||||
add al,1
|
||||
jnc $L$w1stloop
|
||||
|
||||
xor r9,r9
|
||||
xor r8,r8
|
||||
ALIGN 16
|
||||
$L$w2ndloop::
|
||||
mov r10d,DWORD PTR[r9*4+rdi]
|
||||
add r8b,BYTE PTR[rsi*1+rdx]
|
||||
add r8b,r10b
|
||||
add rsi,1
|
||||
mov r11d,DWORD PTR[r8*4+rdi]
|
||||
cmovz rsi,rcx
|
||||
mov DWORD PTR[r8*4+rdi],r10d
|
||||
mov DWORD PTR[r9*4+rdi],r11d
|
||||
add r9b,1
|
||||
jnc $L$w2ndloop
|
||||
jmp $L$exit_key
|
||||
|
||||
ALIGN 16
|
||||
$L$c1stloop::
|
||||
mov BYTE PTR[rax*1+rdi],al
|
||||
add al,1
|
||||
jnc $L$c1stloop
|
||||
|
||||
xor r9,r9
|
||||
xor r8,r8
|
||||
ALIGN 16
|
||||
$L$c2ndloop::
|
||||
mov r10b,BYTE PTR[r9*1+rdi]
|
||||
add r8b,BYTE PTR[rsi*1+rdx]
|
||||
add r8b,r10b
|
||||
add rsi,1
|
||||
mov r11b,BYTE PTR[r8*1+rdi]
|
||||
jnz $L$cnowrap
|
||||
mov rsi,rcx
|
||||
$L$cnowrap::
|
||||
mov BYTE PTR[r8*1+rdi],r10b
|
||||
mov BYTE PTR[r9*1+rdi],r11b
|
||||
add r9b,1
|
||||
jnc $L$c2ndloop
|
||||
mov DWORD PTR[256+rdi],-1
|
||||
|
||||
ALIGN 16
|
||||
$L$exit_key::
|
||||
xor eax,eax
|
||||
mov DWORD PTR[((-8))+rdi],eax
|
||||
mov DWORD PTR[((-4))+rdi],eax
|
||||
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
||||
mov rsi,QWORD PTR[16+rsp]
|
||||
DB 0F3h,0C3h ;repret
|
||||
$L$SEH_end_RC4_set_key::
|
||||
RC4_set_key ENDP
|
||||
|
||||
.text$ ENDS
|
||||
END
|
||||
|
1264
crypto/rc4/rc4-md5-elf-x86_64.S
Normal file
1264
crypto/rc4/rc4-md5-elf-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
1260
crypto/rc4/rc4-md5-macosx-x86_64.S
Normal file
1260
crypto/rc4/rc4-md5-macosx-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
1347
crypto/rc4/rc4-md5-masm-x86_64.S
Normal file
1347
crypto/rc4/rc4-md5-masm-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
1273
crypto/rc4/rc4-md5-mingw64-x86_64.S
Normal file
1273
crypto/rc4/rc4-md5-mingw64-x86_64.S
Normal file
File diff suppressed because it is too large
Load Diff
615
crypto/rc4/rc4-mingw64-x86_64.S
Normal file
615
crypto/rc4/rc4-mingw64-x86_64.S
Normal file
@@ -0,0 +1,615 @@
|
||||
#include "x86_arch.h"
|
||||
.text
|
||||
|
||||
|
||||
|
||||
.globl RC4
|
||||
.def RC4; .scl 2; .type 32; .endef
|
||||
.p2align 4
|
||||
RC4:
|
||||
movq %rdi,8(%rsp)
|
||||
movq %rsi,16(%rsp)
|
||||
movq %rsp,%rax
|
||||
.LSEH_begin_RC4:
|
||||
movq %rcx,%rdi
|
||||
movq %rdx,%rsi
|
||||
movq %r8,%rdx
|
||||
movq %r9,%rcx
|
||||
orq %rsi,%rsi
|
||||
jne .Lentry
|
||||
movq 8(%rsp),%rdi
|
||||
movq 16(%rsp),%rsi
|
||||
retq
|
||||
.Lentry:
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
.Lprologue:
|
||||
movq %rsi,%r11
|
||||
movq %rdx,%r12
|
||||
movq %rcx,%r13
|
||||
xorq %r10,%r10
|
||||
xorq %rcx,%rcx
|
||||
|
||||
leaq 8(%rdi),%rdi
|
||||
movb -8(%rdi),%r10b
|
||||
movb -4(%rdi),%cl
|
||||
cmpl $-1,256(%rdi)
|
||||
je .LRC4_CHAR
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
xorq %rbx,%rbx
|
||||
incb %r10b
|
||||
subq %r10,%rbx
|
||||
subq %r12,%r13
|
||||
movl (%rdi,%r10,4),%eax
|
||||
testq $-16,%r11
|
||||
jz .Lloop1
|
||||
btl $IA32CAP_BIT0_INTEL,%r8d
|
||||
jc .Lintel
|
||||
andq $7,%rbx
|
||||
leaq 1(%r10),%rsi
|
||||
jz .Loop8
|
||||
subq %rbx,%r11
|
||||
.Loop8_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop8_warmup
|
||||
|
||||
leaq 1(%r10),%rsi
|
||||
jmp .Loop8
|
||||
.p2align 4
|
||||
.Loop8:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 0(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,0(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,4(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 8(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,8(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 12(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,12(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 16(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,16(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl 20(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,20(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl 24(%rdi,%rsi,4),%ebx
|
||||
rorq $8,%r8
|
||||
movl %edx,24(%rdi,%r10,4)
|
||||
addb %al,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%sil
|
||||
addb %bl,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
movl -4(%rdi,%rsi,4),%eax
|
||||
rorq $8,%r8
|
||||
movl %edx,28(%rdi,%r10,4)
|
||||
addb %bl,%dl
|
||||
movb (%rdi,%rdx,4),%r8b
|
||||
addb $8,%r10b
|
||||
rorq $8,%r8
|
||||
subq $8,%r11
|
||||
|
||||
xorq (%r12),%r8
|
||||
movq %r8,(%r13,%r12,1)
|
||||
leaq 8(%r12),%r12
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Loop8
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.p2align 4
|
||||
.Lintel:
|
||||
testq $-32,%r11
|
||||
jz .Lloop1
|
||||
andq $15,%rbx
|
||||
jz .Loop16_is_hot
|
||||
subq %rbx,%r11
|
||||
.Loop16_warmup:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %rbx
|
||||
jnz .Loop16_warmup
|
||||
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
movb %bl,%cl
|
||||
|
||||
.Loop16_is_hot:
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
jmp .Loop16_enter
|
||||
.p2align 4
|
||||
.Loop16:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm0,%xmm2
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm0
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 4(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,0(%rsi)
|
||||
pxor %xmm1,%xmm2
|
||||
addb %bl,%cl
|
||||
pinsrw $0,(%rdi,%rax,4),%xmm0
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
.Loop16_enter:
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
pxor %xmm1,%xmm1
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 8(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,4(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $0,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 12(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,8(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $1,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 16(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,12(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $1,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 20(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,16(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $2,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 24(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,20(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $2,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 28(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,24(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $3,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 32(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,28(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $3,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 36(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,32(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $4,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 40(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,36(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $4,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 44(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,40(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $5,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 48(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,44(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $5,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 52(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,48(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $6,(%rdi,%rax,4),%xmm0
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movl 56(%rsi),%eax
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,52(%rsi)
|
||||
addb %al,%cl
|
||||
pinsrw $6,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
addb %dl,%al
|
||||
movl 60(%rsi),%ebx
|
||||
movzbl %al,%eax
|
||||
movl %edx,56(%rsi)
|
||||
addb %bl,%cl
|
||||
pinsrw $7,(%rdi,%rax,4),%xmm0
|
||||
addb $16,%r10b
|
||||
movdqu (%r12),%xmm2
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %ebx,(%rdi,%rcx,4)
|
||||
addb %dl,%bl
|
||||
movzbl %bl,%ebx
|
||||
movl %edx,60(%rsi)
|
||||
leaq (%rdi,%r10,4),%rsi
|
||||
pinsrw $7,(%rdi,%rbx,4),%xmm1
|
||||
movl (%rsi),%eax
|
||||
movq %rcx,%rbx
|
||||
xorq %rcx,%rcx
|
||||
subq $16,%r11
|
||||
movb %bl,%cl
|
||||
testq $-16,%r11
|
||||
jnz .Loop16
|
||||
|
||||
psllq $8,%xmm1
|
||||
pxor %xmm0,%xmm2
|
||||
pxor %xmm1,%xmm2
|
||||
movdqu %xmm2,(%r13,%r12,1)
|
||||
leaq 16(%r12),%r12
|
||||
|
||||
cmpq $0,%r11
|
||||
jne .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.p2align 4
|
||||
.Lloop1:
|
||||
addb %al,%cl
|
||||
movl (%rdi,%rcx,4),%edx
|
||||
movl %eax,(%rdi,%rcx,4)
|
||||
movl %edx,(%rdi,%r10,4)
|
||||
addb %dl,%al
|
||||
incb %r10b
|
||||
movl (%rdi,%rax,4),%edx
|
||||
movl (%rdi,%r10,4),%eax
|
||||
xorb (%r12),%dl
|
||||
movb %dl,(%r13,%r12,1)
|
||||
leaq 1(%r12),%r12
|
||||
decq %r11
|
||||
jnz .Lloop1
|
||||
jmp .Lexit
|
||||
|
||||
.p2align 4
|
||||
.LRC4_CHAR:
|
||||
addb $1,%r10b
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
testq $-8,%r11
|
||||
jz .Lcloop1
|
||||
jmp .Lcloop8
|
||||
.p2align 4
|
||||
.Lcloop8:
|
||||
movl (%r12),%r8d
|
||||
movl 4(%r12),%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov0
|
||||
movq %rax,%rbx
|
||||
.Lcmov0:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov1
|
||||
movq %rbx,%rax
|
||||
.Lcmov1:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov2
|
||||
movq %rax,%rbx
|
||||
.Lcmov2:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov3
|
||||
movq %rbx,%rax
|
||||
.Lcmov3:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r8b
|
||||
rorl $8,%r8d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov4
|
||||
movq %rax,%rbx
|
||||
.Lcmov4:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov5
|
||||
movq %rbx,%rax
|
||||
.Lcmov5:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %al,%cl
|
||||
leaq 1(%r10),%rsi
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %sil,%esi
|
||||
movzbl (%rdi,%rsi,1),%ebx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
cmpq %rsi,%rcx
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
jne .Lcmov6
|
||||
movq %rax,%rbx
|
||||
.Lcmov6:
|
||||
addb %al,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
addb %bl,%cl
|
||||
leaq 1(%rsi),%r10
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
movb %bl,(%rdi,%rcx,1)
|
||||
cmpq %r10,%rcx
|
||||
movb %dl,(%rdi,%rsi,1)
|
||||
jne .Lcmov7
|
||||
movq %rbx,%rax
|
||||
.Lcmov7:
|
||||
addb %bl,%dl
|
||||
xorb (%rdi,%rdx,1),%r9b
|
||||
rorl $8,%r9d
|
||||
leaq -8(%r11),%r11
|
||||
movl %r8d,(%r13)
|
||||
leaq 8(%r12),%r12
|
||||
movl %r9d,4(%r13)
|
||||
leaq 8(%r13),%r13
|
||||
|
||||
testq $-8,%r11
|
||||
jnz .Lcloop8
|
||||
cmpq $0,%r11
|
||||
jne .Lcloop1
|
||||
jmp .Lexit
|
||||
.p2align 4
|
||||
.Lcloop1:
|
||||
addb %al,%cl
|
||||
movzbl %cl,%ecx
|
||||
movzbl (%rdi,%rcx,1),%edx
|
||||
movb %al,(%rdi,%rcx,1)
|
||||
movb %dl,(%rdi,%r10,1)
|
||||
addb %al,%dl
|
||||
addb $1,%r10b
|
||||
movzbl %dl,%edx
|
||||
movzbl %r10b,%r10d
|
||||
movzbl (%rdi,%rdx,1),%edx
|
||||
movzbl (%rdi,%r10,1),%eax
|
||||
xorb (%r12),%dl
|
||||
leaq 1(%r12),%r12
|
||||
movb %dl,(%r13)
|
||||
leaq 1(%r13),%r13
|
||||
subq $1,%r11
|
||||
jnz .Lcloop1
|
||||
jmp .Lexit
|
||||
|
||||
.p2align 4
|
||||
.Lexit:
|
||||
subb $1,%r10b
|
||||
movl %r10d,-8(%rdi)
|
||||
movl %ecx,-4(%rdi)
|
||||
|
||||
movq (%rsp),%r13
|
||||
movq 8(%rsp),%r12
|
||||
movq 16(%rsp),%rbx
|
||||
addq $24,%rsp
|
||||
.Lepilogue:
|
||||
movq 8(%rsp),%rdi
|
||||
movq 16(%rsp),%rsi
|
||||
retq
|
||||
.LSEH_end_RC4:
|
||||
.globl RC4_set_key
|
||||
.def RC4_set_key; .scl 2; .type 32; .endef
|
||||
.p2align 4
|
||||
RC4_set_key:
|
||||
movq %rdi,8(%rsp)
|
||||
movq %rsi,16(%rsp)
|
||||
movq %rsp,%rax
|
||||
.LSEH_begin_RC4_set_key:
|
||||
movq %rcx,%rdi
|
||||
movq %rdx,%rsi
|
||||
movq %r8,%rdx
|
||||
|
||||
leaq 8(%rdi),%rdi
|
||||
leaq (%rdx,%rsi,1),%rdx
|
||||
negq %rsi
|
||||
movq %rsi,%rcx
|
||||
xorl %eax,%eax
|
||||
xorq %r9,%r9
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
|
||||
movl OPENSSL_ia32cap_P(%rip),%r8d
|
||||
btl $IA32CAP_BIT0_INTELP4,%r8d
|
||||
jc .Lc1stloop
|
||||
jmp .Lw1stloop
|
||||
|
||||
.p2align 4
|
||||
.Lw1stloop:
|
||||
movl %eax,(%rdi,%rax,4)
|
||||
addb $1,%al
|
||||
jnc .Lw1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.p2align 4
|
||||
.Lw2ndloop:
|
||||
movl (%rdi,%r9,4),%r10d
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movl (%rdi,%r8,4),%r11d
|
||||
cmovzq %rcx,%rsi
|
||||
movl %r10d,(%rdi,%r8,4)
|
||||
movl %r11d,(%rdi,%r9,4)
|
||||
addb $1,%r9b
|
||||
jnc .Lw2ndloop
|
||||
jmp .Lexit_key
|
||||
|
||||
.p2align 4
|
||||
.Lc1stloop:
|
||||
movb %al,(%rdi,%rax,1)
|
||||
addb $1,%al
|
||||
jnc .Lc1stloop
|
||||
|
||||
xorq %r9,%r9
|
||||
xorq %r8,%r8
|
||||
.p2align 4
|
||||
.Lc2ndloop:
|
||||
movb (%rdi,%r9,1),%r10b
|
||||
addb (%rdx,%rsi,1),%r8b
|
||||
addb %r10b,%r8b
|
||||
addq $1,%rsi
|
||||
movb (%rdi,%r8,1),%r11b
|
||||
jnz .Lcnowrap
|
||||
movq %rcx,%rsi
|
||||
.Lcnowrap:
|
||||
movb %r10b,(%rdi,%r8,1)
|
||||
movb %r11b,(%rdi,%r9,1)
|
||||
addb $1,%r9b
|
||||
jnc .Lc2ndloop
|
||||
movl $-1,256(%rdi)
|
||||
|
||||
.p2align 4
|
||||
.Lexit_key:
|
||||
xorl %eax,%eax
|
||||
movl %eax,-8(%rdi)
|
||||
movl %eax,-4(%rdi)
|
||||
movq 8(%rsp),%rdi
|
||||
movq 16(%rsp),%rsi
|
||||
retq
|
||||
.LSEH_end_RC4_set_key:
|
254
crypto/rc4/rc4_enc.c
Normal file
254
crypto/rc4/rc4_enc.c
Normal file
@@ -0,0 +1,254 @@
|
||||
/* $OpenBSD: rc4_enc.c,v 1.18 2022/11/26 16:08:54 tb Exp $ */
|
||||
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
* This package is an SSL implementation written
|
||||
* by Eric Young (eay@cryptsoft.com).
|
||||
* The implementation was written so as to conform with Netscapes SSL.
|
||||
*
|
||||
* This library is free for commercial and non-commercial use as long as
|
||||
* the following conditions are aheared to. The following conditions
|
||||
* apply to all code found in this distribution, be it the RC4, RSA,
|
||||
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
|
||||
* included with this distribution is covered by the same copyright terms
|
||||
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
|
||||
*
|
||||
* Copyright remains Eric Young's, and as such any Copyright notices in
|
||||
* the code are not to be removed.
|
||||
* If this package is used in a product, Eric Young should be given attribution
|
||||
* as the author of the parts of the library used.
|
||||
* This can be in the form of a textual message at program startup or
|
||||
* in documentation (online or textual) provided with the package.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* "This product includes cryptographic software written by
|
||||
* Eric Young (eay@cryptsoft.com)"
|
||||
* The word 'cryptographic' can be left out if the rouines from the library
|
||||
* being used are not cryptographic related :-).
|
||||
* 4. If you include any Windows specific code (or a derivative thereof) from
|
||||
* the apps directory (application code) you must include an acknowledgement:
|
||||
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* The licence and distribution terms for any publically available version or
|
||||
* derivative of this code cannot be changed. i.e. this code cannot simply be
|
||||
* copied and put under another distribution licence
|
||||
* [including the GNU Public Licence.]
|
||||
*/
|
||||
|
||||
#include <endian.h>
|
||||
|
||||
#include <openssl/rc4.h>
|
||||
#include "rc4_local.h"
|
||||
|
||||
/* RC4 as implemented from a posting from
|
||||
* Newsgroups: sci.crypt
|
||||
* From: sterndark@netcom.com (David Sterndark)
|
||||
* Subject: RC4 Algorithm revealed.
|
||||
* Message-ID: <sternCvKL4B.Hyy@netcom.com>
|
||||
* Date: Wed, 14 Sep 1994 06:35:31 GMT
|
||||
*/
|
||||
|
||||
void
|
||||
RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
|
||||
unsigned char *outdata)
|
||||
{
|
||||
RC4_INT *d;
|
||||
RC4_INT x, y,tx, ty;
|
||||
size_t i;
|
||||
|
||||
x = key->x;
|
||||
y = key->y;
|
||||
d = key->data;
|
||||
|
||||
#if defined(RC4_CHUNK)
|
||||
/*
|
||||
* The original reason for implementing this(*) was the fact that
|
||||
* pre-21164a Alpha CPUs don't have byte load/store instructions
|
||||
* and e.g. a byte store has to be done with 64-bit load, shift,
|
||||
* and, or and finally 64-bit store. Peaking data and operating
|
||||
* at natural word size made it possible to reduce amount of
|
||||
* instructions as well as to perform early read-ahead without
|
||||
* suffering from RAW (read-after-write) hazard. This resulted
|
||||
* in ~40%(**) performance improvement on 21064 box with gcc.
|
||||
* But it's not only Alpha users who win here:-) Thanks to the
|
||||
* early-n-wide read-ahead this implementation also exhibits
|
||||
* >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
|
||||
* on sizeof(RC4_INT)).
|
||||
*
|
||||
* (*) "this" means code which recognizes the case when input
|
||||
* and output pointers appear to be aligned at natural CPU
|
||||
* word boundary
|
||||
* (**) i.e. according to 'apps/openssl speed rc4' benchmark,
|
||||
* crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
|
||||
*
|
||||
* Caveats.
|
||||
*
|
||||
* - RC4_CHUNK="unsigned long long" should be a #1 choice for
|
||||
* UltraSPARC. Unfortunately gcc generates very slow code
|
||||
* (2.5-3 times slower than one generated by Sun's WorkShop
|
||||
* C) and therefore gcc (at least 2.95 and earlier) should
|
||||
* always be told that RC4_CHUNK="unsigned long".
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
|
||||
# define RC4_STEP ( \
|
||||
x=(x+1) &0xff, \
|
||||
tx=d[x], \
|
||||
y=(tx+y)&0xff, \
|
||||
ty=d[y], \
|
||||
d[y]=tx, \
|
||||
d[x]=ty, \
|
||||
(RC4_CHUNK)d[(tx+ty)&0xff]\
|
||||
)
|
||||
|
||||
if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) |
|
||||
((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) {
|
||||
RC4_CHUNK ichunk, otp;
|
||||
|
||||
/*
|
||||
* I reckon we can afford to implement both endian
|
||||
* cases and to decide which way to take at run-time
|
||||
* because the machine code appears to be very compact
|
||||
* and redundant 1-2KB is perfectly tolerable (i.e.
|
||||
* in case the compiler fails to eliminate it:-). By
|
||||
* suggestion from Terrel Larson <terr@terralogic.net>.
|
||||
*
|
||||
* Special notes.
|
||||
*
|
||||
* - compilers (those I've tried) don't seem to have
|
||||
* problems eliminating either the operators guarded
|
||||
* by "if (sizeof(RC4_CHUNK)==8)" or the condition
|
||||
* expressions themselves so I've got 'em to replace
|
||||
* corresponding #ifdefs from the previous version;
|
||||
* - I chose to let the redundant switch cases when
|
||||
* sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
|
||||
* before);
|
||||
* - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
|
||||
* [LB]ESHFT guards against "shift is out of range"
|
||||
* warnings when sizeof(RC4_CHUNK)!=8
|
||||
*
|
||||
* <appro@fy.chalmers.se>
|
||||
*/
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
|
||||
for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
|
||||
ichunk = *(RC4_CHUNK *)indata;
|
||||
otp = RC4_STEP << BESHFT(0);
|
||||
otp |= RC4_STEP << BESHFT(1);
|
||||
otp |= RC4_STEP << BESHFT(2);
|
||||
otp |= RC4_STEP << BESHFT(3);
|
||||
if (sizeof(RC4_CHUNK) == 8) {
|
||||
otp |= RC4_STEP << BESHFT(4);
|
||||
otp |= RC4_STEP << BESHFT(5);
|
||||
otp |= RC4_STEP << BESHFT(6);
|
||||
otp |= RC4_STEP << BESHFT(7);
|
||||
}
|
||||
*(RC4_CHUNK *)outdata = otp^ichunk;
|
||||
indata += sizeof(RC4_CHUNK);
|
||||
outdata += sizeof(RC4_CHUNK);
|
||||
}
|
||||
#else
|
||||
# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
|
||||
for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
|
||||
ichunk = *(RC4_CHUNK *)indata;
|
||||
otp = RC4_STEP;
|
||||
otp |= RC4_STEP << 8;
|
||||
otp |= RC4_STEP << 16;
|
||||
otp |= RC4_STEP << 24;
|
||||
if (sizeof(RC4_CHUNK) == 8) {
|
||||
otp |= RC4_STEP << LESHFT(4);
|
||||
otp |= RC4_STEP << LESHFT(5);
|
||||
otp |= RC4_STEP << LESHFT(6);
|
||||
otp |= RC4_STEP << LESHFT(7);
|
||||
}
|
||||
*(RC4_CHUNK *)outdata = otp ^ ichunk;
|
||||
indata += sizeof(RC4_CHUNK);
|
||||
outdata += sizeof(RC4_CHUNK);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#define LOOP(in,out) \
|
||||
x=((x+1)&0xff); \
|
||||
tx=d[x]; \
|
||||
y=(tx+y)&0xff; \
|
||||
d[x]=ty=d[y]; \
|
||||
d[y]=tx; \
|
||||
(out) = d[(tx+ty)&0xff]^ (in);
|
||||
|
||||
#ifndef RC4_INDEX
|
||||
#define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
|
||||
#else
|
||||
#define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
|
||||
#endif
|
||||
|
||||
i = len >> 3;
|
||||
if (i) {
|
||||
for (;;) {
|
||||
RC4_LOOP(indata, outdata, 0);
|
||||
RC4_LOOP(indata, outdata, 1);
|
||||
RC4_LOOP(indata, outdata, 2);
|
||||
RC4_LOOP(indata, outdata, 3);
|
||||
RC4_LOOP(indata, outdata, 4);
|
||||
RC4_LOOP(indata, outdata, 5);
|
||||
RC4_LOOP(indata, outdata, 6);
|
||||
RC4_LOOP(indata, outdata, 7);
|
||||
#ifdef RC4_INDEX
|
||||
indata += 8;
|
||||
outdata += 8;
|
||||
#endif
|
||||
if (--i == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
i = len&0x07;
|
||||
if (i) {
|
||||
for (;;) {
|
||||
RC4_LOOP(indata, outdata, 0);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 1);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 2);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 3);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 4);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 5);
|
||||
if (--i == 0)
|
||||
break;
|
||||
RC4_LOOP(indata, outdata, 6);
|
||||
if (--i == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
key->x = x;
|
||||
key->y = y;
|
||||
}
|
5
crypto/rc4/rc4_local.h
Normal file
5
crypto/rc4/rc4_local.h
Normal file
@@ -0,0 +1,5 @@
|
||||
/* $OpenBSD: rc4_local.h,v 1.1 2022/11/26 16:08:54 tb Exp $ */
|
||||
|
||||
#ifndef HEADER_RC4_LOCL_H
|
||||
#define HEADER_RC4_LOCL_H
|
||||
#endif
|
99
crypto/rc4/rc4_skey.c
Normal file
99
crypto/rc4/rc4_skey.c
Normal file
@@ -0,0 +1,99 @@
|
||||
/* $OpenBSD: rc4_skey.c,v 1.16 2023/07/28 10:35:14 tb Exp $ */
|
||||
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
* This package is an SSL implementation written
|
||||
* by Eric Young (eay@cryptsoft.com).
|
||||
* The implementation was written so as to conform with Netscapes SSL.
|
||||
*
|
||||
* This library is free for commercial and non-commercial use as long as
|
||||
* the following conditions are aheared to. The following conditions
|
||||
* apply to all code found in this distribution, be it the RC4, RSA,
|
||||
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
|
||||
* included with this distribution is covered by the same copyright terms
|
||||
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
|
||||
*
|
||||
* Copyright remains Eric Young's, and as such any Copyright notices in
|
||||
* the code are not to be removed.
|
||||
* If this package is used in a product, Eric Young should be given attribution
|
||||
* as the author of the parts of the library used.
|
||||
* This can be in the form of a textual message at program startup or
|
||||
* in documentation (online or textual) provided with the package.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* "This product includes cryptographic software written by
|
||||
* Eric Young (eay@cryptsoft.com)"
|
||||
* The word 'cryptographic' can be left out if the rouines from the library
|
||||
* being used are not cryptographic related :-).
|
||||
* 4. If you include any Windows specific code (or a derivative thereof) from
|
||||
* the apps directory (application code) you must include an acknowledgement:
|
||||
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* The licence and distribution terms for any publically available version or
|
||||
* derivative of this code cannot be changed. i.e. this code cannot simply be
|
||||
* copied and put under another distribution licence
|
||||
* [including the GNU Public Licence.]
|
||||
*/
|
||||
|
||||
#include <openssl/rc4.h>
|
||||
|
||||
#include "rc4_local.h"
|
||||
|
||||
/* RC4 as implemented from a posting from
|
||||
* Newsgroups: sci.crypt
|
||||
* From: sterndark@netcom.com (David Sterndark)
|
||||
* Subject: RC4 Algorithm revealed.
|
||||
* Message-ID: <sternCvKL4B.Hyy@netcom.com>
|
||||
* Date: Wed, 14 Sep 1994 06:35:31 GMT
|
||||
*/
|
||||
|
||||
void
|
||||
RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
|
||||
{
|
||||
RC4_INT tmp;
|
||||
int id1, id2;
|
||||
RC4_INT *d;
|
||||
unsigned int i;
|
||||
|
||||
d = &(key->data[0]);
|
||||
key->x = 0;
|
||||
key->y = 0;
|
||||
id1 = id2 = 0;
|
||||
|
||||
#define SK_LOOP(d,n) { \
|
||||
tmp=d[(n)]; \
|
||||
id2 = (data[id1] + tmp + id2) & 0xff; \
|
||||
if (++id1 == len) id1=0; \
|
||||
d[(n)]=d[id2]; \
|
||||
d[id2]=tmp; }
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
d[i] = i;
|
||||
for (i = 0; i < 256; i += 4) {
|
||||
SK_LOOP(d, i + 0);
|
||||
SK_LOOP(d, i + 1);
|
||||
SK_LOOP(d, i + 2);
|
||||
SK_LOOP(d, i + 3);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user