check in v3.8.0 source

This commit is contained in:
2023-08-31 00:49:24 -07:00
parent 3ef498f9e6
commit 316795abde
1218 changed files with 562506 additions and 0 deletions

625
crypto/rc4/rc4-elf-x86_64.S Normal file
View File

@@ -0,0 +1,625 @@
#include "x86_arch.h"
.text
.hidden OPENSSL_ia32cap_P
.globl RC4
.type RC4,@function
.align 16
RC4:
endbr64
orq %rsi,%rsi
jne .Lentry
retq
.Lentry:
pushq %rbx
pushq %r12
pushq %r13
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je .LRC4_CHAR
movl OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz .Lloop1
btl $IA32CAP_BIT0_INTEL,%r8d
jc .Lintel
andq $7,%rbx
leaq 1(%r10),%rsi
jz .Loop8
subq %rbx,%r11
.Loop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop8_warmup
leaq 1(%r10),%rsi
jmp .Loop8
.align 16
.Loop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r13,%r12,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz .Loop8
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lintel:
testq $-32,%r11
jz .Lloop1
andq $15,%rbx
jz .Loop16_is_hot
subq %rbx,%r11
.Loop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
.Loop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp .Loop16_enter
.align 16
.Loop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
.Loop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz .Loop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.align 16
.Lloop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %r11
jnz .Lloop1
jmp .Lexit
.align 16
.LRC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz .Lcloop1
jmp .Lcloop8
.align 16
.Lcloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov0
movq %rax,%rbx
.Lcmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov1
movq %rbx,%rax
.Lcmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov2
movq %rax,%rbx
.Lcmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov3
movq %rbx,%rax
.Lcmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov4
movq %rax,%rbx
.Lcmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov5
movq %rbx,%rax
.Lcmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov6
movq %rax,%rbx
.Lcmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov7
movq %rbx,%rax
.Lcmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz .Lcloop8
cmpq $0,%r11
jne .Lcloop1
jmp .Lexit
.align 16
.Lcloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz .Lcloop1
jmp .Lexit
.align 16
.Lexit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
.Lepilogue:
retq
.size RC4,.-RC4
.globl RC4_set_key
.type RC4_set_key,@function
.align 16
RC4_set_key:
endbr64
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl OPENSSL_ia32cap_P(%rip),%r8d
btl $IA32CAP_BIT0_INTELP4,%r8d
jc .Lc1stloop
jmp .Lw1stloop
.align 16
.Lw1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc .Lw1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lw2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc .Lw2ndloop
jmp .Lexit_key
.align 16
.Lc1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc .Lc1stloop
xorq %r9,%r9
xorq %r8,%r8
.align 16
.Lc2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz .Lcnowrap
movq %rcx,%rsi
.Lcnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc .Lc2ndloop
movl $-1,256(%rdi)
.align 16
.Lexit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
retq
.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,@function
.align 16
RC4_options:
endbr64
leaq .Lopts(%rip),%rax
movl OPENSSL_ia32cap_P(%rip),%edx
btl $IA32CAP_BIT0_INTELP4,%edx
jc .L8xchar
btl $IA32CAP_BIT0_INTEL,%edx
jnc .Ldone
addq $25,%rax
retq
.L8xchar:
addq $12,%rax
.Ldone:
retq
.section .rodata
.align 64
.Lopts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
.align 64
.text
.size RC4_options,.-RC4_options
#if defined(HAVE_GNU_STACK)
.section .note.GNU-stack,"",%progbits
#endif

View File

@@ -0,0 +1,622 @@
#include "x86_arch.h"
.text
.private_extern _OPENSSL_ia32cap_P
.globl _RC4
.p2align 4
_RC4:
orq %rsi,%rsi
jne L$entry
retq
L$entry:
pushq %rbx
pushq %r12
pushq %r13
L$prologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je L$RC4_CHAR
movl _OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz L$loop1
btl $IA32CAP_BIT0_INTEL,%r8d
jc L$intel
andq $7,%rbx
leaq 1(%r10),%rsi
jz L$oop8
subq %rbx,%r11
L$oop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop8_warmup
leaq 1(%r10),%rsi
jmp L$oop8
.p2align 4
L$oop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r13,%r12,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz L$oop8
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$intel:
testq $-32,%r11
jz L$loop1
andq $15,%rbx
jz L$oop16_is_hot
subq %rbx,%r11
L$oop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz L$oop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
L$oop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp L$oop16_enter
.p2align 4
L$oop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
L$oop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz L$oop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne L$loop1
jmp L$exit
.p2align 4
L$loop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %r11
jnz L$loop1
jmp L$exit
.p2align 4
L$RC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz L$cloop1
jmp L$cloop8
.p2align 4
L$cloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov0
movq %rax,%rbx
L$cmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov1
movq %rbx,%rax
L$cmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov2
movq %rax,%rbx
L$cmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov3
movq %rbx,%rax
L$cmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov4
movq %rax,%rbx
L$cmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov5
movq %rbx,%rax
L$cmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne L$cmov6
movq %rax,%rbx
L$cmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne L$cmov7
movq %rbx,%rax
L$cmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz L$cloop8
cmpq $0,%r11
jne L$cloop1
jmp L$exit
.p2align 4
L$cloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz L$cloop1
jmp L$exit
.p2align 4
L$exit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
L$epilogue:
retq
.globl _RC4_set_key
.p2align 4
_RC4_set_key:
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl _OPENSSL_ia32cap_P(%rip),%r8d
btl $IA32CAP_BIT0_INTELP4,%r8d
jc L$c1stloop
jmp L$w1stloop
.p2align 4
L$w1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc L$w1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$w2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc L$w2ndloop
jmp L$exit_key
.p2align 4
L$c1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc L$c1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
L$c2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz L$cnowrap
movq %rcx,%rsi
L$cnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc L$c2ndloop
movl $-1,256(%rdi)
.p2align 4
L$exit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
retq
.globl _RC4_options
.p2align 4
_RC4_options:
leaq L$opts(%rip),%rax
movl _OPENSSL_ia32cap_P(%rip),%edx
btl $IA32CAP_BIT0_INTELP4,%edx
jc L$8xchar
btl $IA32CAP_BIT0_INTEL,%edx
jnc L$done
addq $25,%rax
retq
L$8xchar:
addq $12,%rax
L$done:
retq
.section __DATA,__const
.p2align 6
L$opts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
.p2align 6
.text

View File

@@ -0,0 +1,723 @@
; 1 "crypto/rc4/rc4-masm-x86_64.S.tmp"
; 1 "<built-in>" 1
; 1 "<built-in>" 3
; 399 "<built-in>" 3
; 1 "<command line>" 1
; 1 "<built-in>" 2
; 1 "crypto/rc4/rc4-masm-x86_64.S.tmp" 2
OPTION DOTNAME
; 1 "./crypto/x86_arch.h" 1
; 16 "./crypto/x86_arch.h"
; 40 "./crypto/x86_arch.h"
; 3 "crypto/rc4/rc4-masm-x86_64.S.tmp" 2
.text$ SEGMENT ALIGN(64) 'CODE'
EXTERN OPENSSL_ia32cap_P:NEAR
PUBLIC RC4
ALIGN 16
RC4 PROC PUBLIC
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_RC4::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
endbr64
or rsi,rsi
jne $L$entry
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$entry::
push rbx
push r12
push r13
$L$prologue::
mov r11,rsi
mov r12,rdx
mov r13,rcx
xor r10,r10
xor rcx,rcx
lea rdi,QWORD PTR[8+rdi]
mov r10b,BYTE PTR[((-8))+rdi]
mov cl,BYTE PTR[((-4))+rdi]
cmp DWORD PTR[256+rdi],-1
je $L$RC4_CHAR
mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
xor rbx,rbx
inc r10b
sub rbx,r10
sub r13,r12
mov eax,DWORD PTR[r10*4+rdi]
test r11,-16
jz $L$loop1
bt r8d,30
jc $L$intel
and rbx,7
lea rsi,QWORD PTR[1+r10]
jz $L$oop8
sub r11,rbx
$L$oop8_warmup::
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov DWORD PTR[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD PTR[rax*4+rdi]
mov eax,DWORD PTR[r10*4+rdi]
xor dl,BYTE PTR[r12]
mov BYTE PTR[r12*1+r13],dl
lea r12,QWORD PTR[1+r12]
dec rbx
jnz $L$oop8_warmup
lea rsi,QWORD PTR[1+r10]
jmp $L$oop8
ALIGN 16
$L$oop8::
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov ebx,DWORD PTR[rsi*4+rdi]
ror r8,8
mov DWORD PTR[r10*4+rdi],edx
add dl,al
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,bl
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
mov eax,DWORD PTR[4+rsi*4+rdi]
ror r8,8
mov DWORD PTR[4+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov ebx,DWORD PTR[8+rsi*4+rdi]
ror r8,8
mov DWORD PTR[8+r10*4+rdi],edx
add dl,al
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,bl
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
mov eax,DWORD PTR[12+rsi*4+rdi]
ror r8,8
mov DWORD PTR[12+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov ebx,DWORD PTR[16+rsi*4+rdi]
ror r8,8
mov DWORD PTR[16+r10*4+rdi],edx
add dl,al
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,bl
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
mov eax,DWORD PTR[20+rsi*4+rdi]
ror r8,8
mov DWORD PTR[20+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE PTR[rdx*4+rdi]
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov ebx,DWORD PTR[24+rsi*4+rdi]
ror r8,8
mov DWORD PTR[24+r10*4+rdi],edx
add dl,al
mov r8b,BYTE PTR[rdx*4+rdi]
add sil,8
add cl,bl
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
mov eax,DWORD PTR[((-4))+rsi*4+rdi]
ror r8,8
mov DWORD PTR[28+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE PTR[rdx*4+rdi]
add r10b,8
ror r8,8
sub r11,8
xor r8,QWORD PTR[r12]
mov QWORD PTR[r12*1+r13],r8
lea r12,QWORD PTR[8+r12]
test r11,-8
jnz $L$oop8
cmp r11,0
jne $L$loop1
jmp $L$exit
ALIGN 16
$L$intel::
test r11,-32
jz $L$loop1
and rbx,15
jz $L$oop16_is_hot
sub r11,rbx
$L$oop16_warmup::
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov DWORD PTR[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD PTR[rax*4+rdi]
mov eax,DWORD PTR[r10*4+rdi]
xor dl,BYTE PTR[r12]
mov BYTE PTR[r12*1+r13],dl
lea r12,QWORD PTR[1+r12]
dec rbx
jnz $L$oop16_warmup
mov rbx,rcx
xor rcx,rcx
mov cl,bl
$L$oop16_is_hot::
lea rsi,QWORD PTR[r10*4+rdi]
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
pxor xmm0,xmm0
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[4+rsi]
movzx eax,al
mov DWORD PTR[rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],0
jmp $L$oop16_enter
ALIGN 16
$L$oop16::
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
pxor xmm2,xmm0
psllq xmm1,8
pxor xmm0,xmm0
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[4+rsi]
movzx eax,al
mov DWORD PTR[rsi],edx
pxor xmm2,xmm1
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],0
movdqu XMMWORD PTR[r12*1+r13],xmm2
lea r12,QWORD PTR[16+r12]
$L$oop16_enter::
mov edx,DWORD PTR[rcx*4+rdi]
pxor xmm1,xmm1
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[8+rsi]
movzx ebx,bl
mov DWORD PTR[4+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],0
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[12+rsi]
movzx eax,al
mov DWORD PTR[8+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],1
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[16+rsi]
movzx ebx,bl
mov DWORD PTR[12+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],1
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[20+rsi]
movzx eax,al
mov DWORD PTR[16+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],2
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[24+rsi]
movzx ebx,bl
mov DWORD PTR[20+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],2
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[28+rsi]
movzx eax,al
mov DWORD PTR[24+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],3
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[32+rsi]
movzx ebx,bl
mov DWORD PTR[28+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],3
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[36+rsi]
movzx eax,al
mov DWORD PTR[32+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],4
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[40+rsi]
movzx ebx,bl
mov DWORD PTR[36+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],4
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[44+rsi]
movzx eax,al
mov DWORD PTR[40+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],5
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[48+rsi]
movzx ebx,bl
mov DWORD PTR[44+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],5
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[52+rsi]
movzx eax,al
mov DWORD PTR[48+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],6
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD PTR[56+rsi]
movzx ebx,bl
mov DWORD PTR[52+rsi],edx
add cl,al
pinsrw xmm1,WORD PTR[rbx*4+rdi],6
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD PTR[60+rsi]
movzx eax,al
mov DWORD PTR[56+rsi],edx
add cl,bl
pinsrw xmm0,WORD PTR[rax*4+rdi],7
add r10b,16
movdqu xmm2,XMMWORD PTR[r12]
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],ebx
add bl,dl
movzx ebx,bl
mov DWORD PTR[60+rsi],edx
lea rsi,QWORD PTR[r10*4+rdi]
pinsrw xmm1,WORD PTR[rbx*4+rdi],7
mov eax,DWORD PTR[rsi]
mov rbx,rcx
xor rcx,rcx
sub r11,16
mov cl,bl
test r11,-16
jnz $L$oop16
psllq xmm1,8
pxor xmm2,xmm0
pxor xmm2,xmm1
movdqu XMMWORD PTR[r12*1+r13],xmm2
lea r12,QWORD PTR[16+r12]
cmp r11,0
jne $L$loop1
jmp $L$exit
ALIGN 16
$L$loop1::
add cl,al
mov edx,DWORD PTR[rcx*4+rdi]
mov DWORD PTR[rcx*4+rdi],eax
mov DWORD PTR[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD PTR[rax*4+rdi]
mov eax,DWORD PTR[r10*4+rdi]
xor dl,BYTE PTR[r12]
mov BYTE PTR[r12*1+r13],dl
lea r12,QWORD PTR[1+r12]
dec r11
jnz $L$loop1
jmp $L$exit
ALIGN 16
$L$RC4_CHAR::
add r10b,1
movzx eax,BYTE PTR[r10*1+rdi]
test r11,-8
jz $L$cloop1
jmp $L$cloop8
ALIGN 16
$L$cloop8::
mov r8d,DWORD PTR[r12]
mov r9d,DWORD PTR[4+r12]
add cl,al
lea rsi,QWORD PTR[1+r10]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE PTR[rsi*1+rdi]
mov BYTE PTR[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov0
mov rbx,rax
$L$cmov0::
add dl,al
xor r8b,BYTE PTR[rdx*1+rdi]
ror r8d,8
add cl,bl
lea r10,QWORD PTR[1+rsi]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE PTR[r10*1+rdi]
mov BYTE PTR[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov1
mov rax,rbx
$L$cmov1::
add dl,bl
xor r8b,BYTE PTR[rdx*1+rdi]
ror r8d,8
add cl,al
lea rsi,QWORD PTR[1+r10]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE PTR[rsi*1+rdi]
mov BYTE PTR[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov2
mov rbx,rax
$L$cmov2::
add dl,al
xor r8b,BYTE PTR[rdx*1+rdi]
ror r8d,8
add cl,bl
lea r10,QWORD PTR[1+rsi]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE PTR[r10*1+rdi]
mov BYTE PTR[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov3
mov rax,rbx
$L$cmov3::
add dl,bl
xor r8b,BYTE PTR[rdx*1+rdi]
ror r8d,8
add cl,al
lea rsi,QWORD PTR[1+r10]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE PTR[rsi*1+rdi]
mov BYTE PTR[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov4
mov rbx,rax
$L$cmov4::
add dl,al
xor r9b,BYTE PTR[rdx*1+rdi]
ror r9d,8
add cl,bl
lea r10,QWORD PTR[1+rsi]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE PTR[r10*1+rdi]
mov BYTE PTR[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov5
mov rax,rbx
$L$cmov5::
add dl,bl
xor r9b,BYTE PTR[rdx*1+rdi]
ror r9d,8
add cl,al
lea rsi,QWORD PTR[1+r10]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE PTR[rsi*1+rdi]
mov BYTE PTR[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE PTR[r10*1+rdi],dl
jne $L$cmov6
mov rbx,rax
$L$cmov6::
add dl,al
xor r9b,BYTE PTR[rdx*1+rdi]
ror r9d,8
add cl,bl
lea r10,QWORD PTR[1+rsi]
movzx edx,BYTE PTR[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE PTR[r10*1+rdi]
mov BYTE PTR[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE PTR[rsi*1+rdi],dl
jne $L$cmov7
mov rax,rbx
$L$cmov7::
add dl,bl
xor r9b,BYTE PTR[rdx*1+rdi]
ror r9d,8
lea r11,QWORD PTR[((-8))+r11]
mov DWORD PTR[r13],r8d
lea r12,QWORD PTR[8+r12]
mov DWORD PTR[4+r13],r9d
lea r13,QWORD PTR[8+r13]
test r11,-8
jnz $L$cloop8
cmp r11,0
jne $L$cloop1
jmp $L$exit
ALIGN 16
$L$cloop1::
add cl,al
movzx ecx,cl
movzx edx,BYTE PTR[rcx*1+rdi]
mov BYTE PTR[rcx*1+rdi],al
mov BYTE PTR[r10*1+rdi],dl
add dl,al
add r10b,1
movzx edx,dl
movzx r10d,r10b
movzx edx,BYTE PTR[rdx*1+rdi]
movzx eax,BYTE PTR[r10*1+rdi]
xor dl,BYTE PTR[r12]
lea r12,QWORD PTR[1+r12]
mov BYTE PTR[r13],dl
lea r13,QWORD PTR[1+r13]
sub r11,1
jnz $L$cloop1
jmp $L$exit
ALIGN 16
$L$exit::
sub r10b,1
mov DWORD PTR[((-8))+rdi],r10d
mov DWORD PTR[((-4))+rdi],ecx
mov r13,QWORD PTR[rsp]
mov r12,QWORD PTR[8+rsp]
mov rbx,QWORD PTR[16+rsp]
add rsp,24
$L$epilogue::
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_RC4::
RC4 ENDP
PUBLIC RC4_set_key
ALIGN 16
RC4_set_key PROC PUBLIC
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
mov QWORD PTR[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_RC4_set_key::
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
endbr64
lea rdi,QWORD PTR[8+rdi]
lea rdx,QWORD PTR[rsi*1+rdx]
neg rsi
mov rcx,rsi
xor eax,eax
xor r9,r9
xor r10,r10
xor r11,r11
mov r8d,DWORD PTR[OPENSSL_ia32cap_P]
bt r8d,20
jc $L$c1stloop
jmp $L$w1stloop
ALIGN 16
$L$w1stloop::
mov DWORD PTR[rax*4+rdi],eax
add al,1
jnc $L$w1stloop
xor r9,r9
xor r8,r8
ALIGN 16
$L$w2ndloop::
mov r10d,DWORD PTR[r9*4+rdi]
add r8b,BYTE PTR[rsi*1+rdx]
add r8b,r10b
add rsi,1
mov r11d,DWORD PTR[r8*4+rdi]
cmovz rsi,rcx
mov DWORD PTR[r8*4+rdi],r10d
mov DWORD PTR[r9*4+rdi],r11d
add r9b,1
jnc $L$w2ndloop
jmp $L$exit_key
ALIGN 16
$L$c1stloop::
mov BYTE PTR[rax*1+rdi],al
add al,1
jnc $L$c1stloop
xor r9,r9
xor r8,r8
ALIGN 16
$L$c2ndloop::
mov r10b,BYTE PTR[r9*1+rdi]
add r8b,BYTE PTR[rsi*1+rdx]
add r8b,r10b
add rsi,1
mov r11b,BYTE PTR[r8*1+rdi]
jnz $L$cnowrap
mov rsi,rcx
$L$cnowrap::
mov BYTE PTR[r8*1+rdi],r10b
mov BYTE PTR[r9*1+rdi],r11b
add r9b,1
jnc $L$c2ndloop
mov DWORD PTR[256+rdi],-1
ALIGN 16
$L$exit_key::
xor eax,eax
mov DWORD PTR[((-8))+rdi],eax
mov DWORD PTR[((-4))+rdi],eax
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
mov rsi,QWORD PTR[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_RC4_set_key::
RC4_set_key ENDP
PUBLIC RC4_options
ALIGN 16
RC4_options PROC PUBLIC
endbr64
lea rax,QWORD PTR[$L$opts]
mov edx,DWORD PTR[OPENSSL_ia32cap_P]
bt edx,20
jc $L$8xchar
bt edx,30
jnc $L$done
add rax,25
DB 0F3h,0C3h ;repret
$L$8xchar::
add rax,12
$L$done::
DB 0F3h,0C3h ;repret
.text$ ENDS
.rdata SEGMENT READONLY ALIGN(8)
ALIGN 64
$L$opts::
DB 114,99,52,40,56,120,44,105,110,116,41,0
DB 114,99,52,40,56,120,44,99,104,97,114,41,0
DB 114,99,52,40,49,54,120,44,105,110,116,41,0
ALIGN 64
.rdata ENDS
.text$ SEGMENT ALIGN(64) 'CODE'
RC4_options ENDP
.text$ ENDS
END

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,645 @@
#include "x86_arch.h"
.text
.globl RC4
.def RC4; .scl 2; .type 32; .endef
.p2align 4
RC4:
movq %rdi,8(%rsp)
movq %rsi,16(%rsp)
movq %rsp,%rax
.LSEH_begin_RC4:
movq %rcx,%rdi
movq %rdx,%rsi
movq %r8,%rdx
movq %r9,%rcx
endbr64
orq %rsi,%rsi
jne .Lentry
movq 8(%rsp),%rdi
movq 16(%rsp),%rsi
retq
.Lentry:
pushq %rbx
pushq %r12
pushq %r13
.Lprologue:
movq %rsi,%r11
movq %rdx,%r12
movq %rcx,%r13
xorq %r10,%r10
xorq %rcx,%rcx
leaq 8(%rdi),%rdi
movb -8(%rdi),%r10b
movb -4(%rdi),%cl
cmpl $-1,256(%rdi)
je .LRC4_CHAR
movl OPENSSL_ia32cap_P(%rip),%r8d
xorq %rbx,%rbx
incb %r10b
subq %r10,%rbx
subq %r12,%r13
movl (%rdi,%r10,4),%eax
testq $-16,%r11
jz .Lloop1
btl $IA32CAP_BIT0_INTEL,%r8d
jc .Lintel
andq $7,%rbx
leaq 1(%r10),%rsi
jz .Loop8
subq %rbx,%r11
.Loop8_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop8_warmup
leaq 1(%r10),%rsi
jmp .Loop8
.p2align 4
.Loop8:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 0(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,0(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,4(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 8(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,8(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 12(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,12(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 16(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,16(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl 20(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,20(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl 24(%rdi,%rsi,4),%ebx
rorq $8,%r8
movl %edx,24(%rdi,%r10,4)
addb %al,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%sil
addb %bl,%cl
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
movl -4(%rdi,%rsi,4),%eax
rorq $8,%r8
movl %edx,28(%rdi,%r10,4)
addb %bl,%dl
movb (%rdi,%rdx,4),%r8b
addb $8,%r10b
rorq $8,%r8
subq $8,%r11
xorq (%r12),%r8
movq %r8,(%r13,%r12,1)
leaq 8(%r12),%r12
testq $-8,%r11
jnz .Loop8
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.p2align 4
.Lintel:
testq $-32,%r11
jz .Lloop1
andq $15,%rbx
jz .Loop16_is_hot
subq %rbx,%r11
.Loop16_warmup:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %rbx
jnz .Loop16_warmup
movq %rcx,%rbx
xorq %rcx,%rcx
movb %bl,%cl
.Loop16_is_hot:
leaq (%rdi,%r10,4),%rsi
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
jmp .Loop16_enter
.p2align 4
.Loop16:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
pxor %xmm0,%xmm2
psllq $8,%xmm1
pxor %xmm0,%xmm0
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 4(%rsi),%ebx
movzbl %al,%eax
movl %edx,0(%rsi)
pxor %xmm1,%xmm2
addb %bl,%cl
pinsrw $0,(%rdi,%rax,4),%xmm0
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
.Loop16_enter:
movl (%rdi,%rcx,4),%edx
pxor %xmm1,%xmm1
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 8(%rsi),%eax
movzbl %bl,%ebx
movl %edx,4(%rsi)
addb %al,%cl
pinsrw $0,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 12(%rsi),%ebx
movzbl %al,%eax
movl %edx,8(%rsi)
addb %bl,%cl
pinsrw $1,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 16(%rsi),%eax
movzbl %bl,%ebx
movl %edx,12(%rsi)
addb %al,%cl
pinsrw $1,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 20(%rsi),%ebx
movzbl %al,%eax
movl %edx,16(%rsi)
addb %bl,%cl
pinsrw $2,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 24(%rsi),%eax
movzbl %bl,%ebx
movl %edx,20(%rsi)
addb %al,%cl
pinsrw $2,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 28(%rsi),%ebx
movzbl %al,%eax
movl %edx,24(%rsi)
addb %bl,%cl
pinsrw $3,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 32(%rsi),%eax
movzbl %bl,%ebx
movl %edx,28(%rsi)
addb %al,%cl
pinsrw $3,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 36(%rsi),%ebx
movzbl %al,%eax
movl %edx,32(%rsi)
addb %bl,%cl
pinsrw $4,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 40(%rsi),%eax
movzbl %bl,%ebx
movl %edx,36(%rsi)
addb %al,%cl
pinsrw $4,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 44(%rsi),%ebx
movzbl %al,%eax
movl %edx,40(%rsi)
addb %bl,%cl
pinsrw $5,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 48(%rsi),%eax
movzbl %bl,%ebx
movl %edx,44(%rsi)
addb %al,%cl
pinsrw $5,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 52(%rsi),%ebx
movzbl %al,%eax
movl %edx,48(%rsi)
addb %bl,%cl
pinsrw $6,(%rdi,%rax,4),%xmm0
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movl 56(%rsi),%eax
movzbl %bl,%ebx
movl %edx,52(%rsi)
addb %al,%cl
pinsrw $6,(%rdi,%rbx,4),%xmm1
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
addb %dl,%al
movl 60(%rsi),%ebx
movzbl %al,%eax
movl %edx,56(%rsi)
addb %bl,%cl
pinsrw $7,(%rdi,%rax,4),%xmm0
addb $16,%r10b
movdqu (%r12),%xmm2
movl (%rdi,%rcx,4),%edx
movl %ebx,(%rdi,%rcx,4)
addb %dl,%bl
movzbl %bl,%ebx
movl %edx,60(%rsi)
leaq (%rdi,%r10,4),%rsi
pinsrw $7,(%rdi,%rbx,4),%xmm1
movl (%rsi),%eax
movq %rcx,%rbx
xorq %rcx,%rcx
subq $16,%r11
movb %bl,%cl
testq $-16,%r11
jnz .Loop16
psllq $8,%xmm1
pxor %xmm0,%xmm2
pxor %xmm1,%xmm2
movdqu %xmm2,(%r13,%r12,1)
leaq 16(%r12),%r12
cmpq $0,%r11
jne .Lloop1
jmp .Lexit
.p2align 4
.Lloop1:
addb %al,%cl
movl (%rdi,%rcx,4),%edx
movl %eax,(%rdi,%rcx,4)
movl %edx,(%rdi,%r10,4)
addb %dl,%al
incb %r10b
movl (%rdi,%rax,4),%edx
movl (%rdi,%r10,4),%eax
xorb (%r12),%dl
movb %dl,(%r13,%r12,1)
leaq 1(%r12),%r12
decq %r11
jnz .Lloop1
jmp .Lexit
.p2align 4
.LRC4_CHAR:
addb $1,%r10b
movzbl (%rdi,%r10,1),%eax
testq $-8,%r11
jz .Lcloop1
jmp .Lcloop8
.p2align 4
.Lcloop8:
movl (%r12),%r8d
movl 4(%r12),%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov0
movq %rax,%rbx
.Lcmov0:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov1
movq %rbx,%rax
.Lcmov1:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov2
movq %rax,%rbx
.Lcmov2:
addb %al,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov3
movq %rbx,%rax
.Lcmov3:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r8b
rorl $8,%r8d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov4
movq %rax,%rbx
.Lcmov4:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov5
movq %rbx,%rax
.Lcmov5:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %al,%cl
leaq 1(%r10),%rsi
movzbl (%rdi,%rcx,1),%edx
movzbl %sil,%esi
movzbl (%rdi,%rsi,1),%ebx
movb %al,(%rdi,%rcx,1)
cmpq %rsi,%rcx
movb %dl,(%rdi,%r10,1)
jne .Lcmov6
movq %rax,%rbx
.Lcmov6:
addb %al,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
addb %bl,%cl
leaq 1(%rsi),%r10
movzbl (%rdi,%rcx,1),%edx
movzbl %r10b,%r10d
movzbl (%rdi,%r10,1),%eax
movb %bl,(%rdi,%rcx,1)
cmpq %r10,%rcx
movb %dl,(%rdi,%rsi,1)
jne .Lcmov7
movq %rbx,%rax
.Lcmov7:
addb %bl,%dl
xorb (%rdi,%rdx,1),%r9b
rorl $8,%r9d
leaq -8(%r11),%r11
movl %r8d,(%r13)
leaq 8(%r12),%r12
movl %r9d,4(%r13)
leaq 8(%r13),%r13
testq $-8,%r11
jnz .Lcloop8
cmpq $0,%r11
jne .Lcloop1
jmp .Lexit
.p2align 4
.Lcloop1:
addb %al,%cl
movzbl %cl,%ecx
movzbl (%rdi,%rcx,1),%edx
movb %al,(%rdi,%rcx,1)
movb %dl,(%rdi,%r10,1)
addb %al,%dl
addb $1,%r10b
movzbl %dl,%edx
movzbl %r10b,%r10d
movzbl (%rdi,%rdx,1),%edx
movzbl (%rdi,%r10,1),%eax
xorb (%r12),%dl
leaq 1(%r12),%r12
movb %dl,(%r13)
leaq 1(%r13),%r13
subq $1,%r11
jnz .Lcloop1
jmp .Lexit
.p2align 4
.Lexit:
subb $1,%r10b
movl %r10d,-8(%rdi)
movl %ecx,-4(%rdi)
movq (%rsp),%r13
movq 8(%rsp),%r12
movq 16(%rsp),%rbx
addq $24,%rsp
.Lepilogue:
movq 8(%rsp),%rdi
movq 16(%rsp),%rsi
retq
.LSEH_end_RC4:
.globl RC4_set_key
.def RC4_set_key; .scl 2; .type 32; .endef
.p2align 4
RC4_set_key:
movq %rdi,8(%rsp)
movq %rsi,16(%rsp)
movq %rsp,%rax
.LSEH_begin_RC4_set_key:
movq %rcx,%rdi
movq %rdx,%rsi
movq %r8,%rdx
endbr64
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
movq %rsi,%rcx
xorl %eax,%eax
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
movl OPENSSL_ia32cap_P(%rip),%r8d
btl $IA32CAP_BIT0_INTELP4,%r8d
jc .Lc1stloop
jmp .Lw1stloop
.p2align 4
.Lw1stloop:
movl %eax,(%rdi,%rax,4)
addb $1,%al
jnc .Lw1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
.Lw2ndloop:
movl (%rdi,%r9,4),%r10d
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movl (%rdi,%r8,4),%r11d
cmovzq %rcx,%rsi
movl %r10d,(%rdi,%r8,4)
movl %r11d,(%rdi,%r9,4)
addb $1,%r9b
jnc .Lw2ndloop
jmp .Lexit_key
.p2align 4
.Lc1stloop:
movb %al,(%rdi,%rax,1)
addb $1,%al
jnc .Lc1stloop
xorq %r9,%r9
xorq %r8,%r8
.p2align 4
.Lc2ndloop:
movb (%rdi,%r9,1),%r10b
addb (%rdx,%rsi,1),%r8b
addb %r10b,%r8b
addq $1,%rsi
movb (%rdi,%r8,1),%r11b
jnz .Lcnowrap
movq %rcx,%rsi
.Lcnowrap:
movb %r10b,(%rdi,%r8,1)
movb %r11b,(%rdi,%r9,1)
addb $1,%r9b
jnc .Lc2ndloop
movl $-1,256(%rdi)
.p2align 4
.Lexit_key:
xorl %eax,%eax
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
movq 8(%rsp),%rdi
movq 16(%rsp),%rsi
retq
.LSEH_end_RC4_set_key:
.globl RC4_options
.def RC4_options; .scl 2; .type 32; .endef
.p2align 4
RC4_options:
endbr64
leaq .Lopts(%rip),%rax
movl OPENSSL_ia32cap_P(%rip),%edx
btl $IA32CAP_BIT0_INTELP4,%edx
jc .L8xchar
btl $IA32CAP_BIT0_INTEL,%edx
jnc .Ldone
addq $25,%rax
retq
.L8xchar:
addq $12,%rax
.Ldone:
retq
.section .rodata
.p2align 6
.Lopts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
.p2align 6
.text

254
crypto/rc4/rc4_enc.c Normal file
View File

@@ -0,0 +1,254 @@
/* $OpenBSD: rc4_enc.c,v 1.18 2022/11/26 16:08:54 tb Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
#include <endian.h>
#include <openssl/rc4.h>
#include "rc4_local.h"
/* RC4 as implemented from a posting from
* Newsgroups: sci.crypt
* From: sterndark@netcom.com (David Sterndark)
* Subject: RC4 Algorithm revealed.
* Message-ID: <sternCvKL4B.Hyy@netcom.com>
* Date: Wed, 14 Sep 1994 06:35:31 GMT
*/
void
RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
unsigned char *outdata)
{
RC4_INT *d;
RC4_INT x, y,tx, ty;
size_t i;
x = key->x;
y = key->y;
d = key->data;
#if defined(RC4_CHUNK)
/*
* The original reason for implementing this(*) was the fact that
* pre-21164a Alpha CPUs don't have byte load/store instructions
* and e.g. a byte store has to be done with 64-bit load, shift,
* and, or and finally 64-bit store. Peaking data and operating
* at natural word size made it possible to reduce amount of
* instructions as well as to perform early read-ahead without
* suffering from RAW (read-after-write) hazard. This resulted
* in ~40%(**) performance improvement on 21064 box with gcc.
* But it's not only Alpha users who win here:-) Thanks to the
* early-n-wide read-ahead this implementation also exhibits
* >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
* on sizeof(RC4_INT)).
*
* (*) "this" means code which recognizes the case when input
* and output pointers appear to be aligned at natural CPU
* word boundary
* (**) i.e. according to 'apps/openssl speed rc4' benchmark,
* crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
*
* Caveats.
*
* - RC4_CHUNK="unsigned long long" should be a #1 choice for
* UltraSPARC. Unfortunately gcc generates very slow code
* (2.5-3 times slower than one generated by Sun's WorkShop
* C) and therefore gcc (at least 2.95 and earlier) should
* always be told that RC4_CHUNK="unsigned long".
*
* <appro@fy.chalmers.se>
*/
# define RC4_STEP ( \
x=(x+1) &0xff, \
tx=d[x], \
y=(tx+y)&0xff, \
ty=d[y], \
d[y]=tx, \
d[x]=ty, \
(RC4_CHUNK)d[(tx+ty)&0xff]\
)
if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) |
((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0 ) {
RC4_CHUNK ichunk, otp;
/*
* I reckon we can afford to implement both endian
* cases and to decide which way to take at run-time
* because the machine code appears to be very compact
* and redundant 1-2KB is perfectly tolerable (i.e.
* in case the compiler fails to eliminate it:-). By
* suggestion from Terrel Larson <terr@terralogic.net>.
*
* Special notes.
*
* - compilers (those I've tried) don't seem to have
* problems eliminating either the operators guarded
* by "if (sizeof(RC4_CHUNK)==8)" or the condition
* expressions themselves so I've got 'em to replace
* corresponding #ifdefs from the previous version;
* - I chose to let the redundant switch cases when
* sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
* before);
* - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
* [LB]ESHFT guards against "shift is out of range"
* warnings when sizeof(RC4_CHUNK)!=8
*
* <appro@fy.chalmers.se>
*/
#if BYTE_ORDER == BIG_ENDIAN
# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
ichunk = *(RC4_CHUNK *)indata;
otp = RC4_STEP << BESHFT(0);
otp |= RC4_STEP << BESHFT(1);
otp |= RC4_STEP << BESHFT(2);
otp |= RC4_STEP << BESHFT(3);
if (sizeof(RC4_CHUNK) == 8) {
otp |= RC4_STEP << BESHFT(4);
otp |= RC4_STEP << BESHFT(5);
otp |= RC4_STEP << BESHFT(6);
otp |= RC4_STEP << BESHFT(7);
}
*(RC4_CHUNK *)outdata = otp^ichunk;
indata += sizeof(RC4_CHUNK);
outdata += sizeof(RC4_CHUNK);
}
#else
# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
ichunk = *(RC4_CHUNK *)indata;
otp = RC4_STEP;
otp |= RC4_STEP << 8;
otp |= RC4_STEP << 16;
otp |= RC4_STEP << 24;
if (sizeof(RC4_CHUNK) == 8) {
otp |= RC4_STEP << LESHFT(4);
otp |= RC4_STEP << LESHFT(5);
otp |= RC4_STEP << LESHFT(6);
otp |= RC4_STEP << LESHFT(7);
}
*(RC4_CHUNK *)outdata = otp ^ ichunk;
indata += sizeof(RC4_CHUNK);
outdata += sizeof(RC4_CHUNK);
}
#endif
}
#endif
#define LOOP(in,out) \
x=((x+1)&0xff); \
tx=d[x]; \
y=(tx+y)&0xff; \
d[x]=ty=d[y]; \
d[y]=tx; \
(out) = d[(tx+ty)&0xff]^ (in);
#ifndef RC4_INDEX
#define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
#else
#define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
#endif
i = len >> 3;
if (i) {
for (;;) {
RC4_LOOP(indata, outdata, 0);
RC4_LOOP(indata, outdata, 1);
RC4_LOOP(indata, outdata, 2);
RC4_LOOP(indata, outdata, 3);
RC4_LOOP(indata, outdata, 4);
RC4_LOOP(indata, outdata, 5);
RC4_LOOP(indata, outdata, 6);
RC4_LOOP(indata, outdata, 7);
#ifdef RC4_INDEX
indata += 8;
outdata += 8;
#endif
if (--i == 0)
break;
}
}
i = len&0x07;
if (i) {
for (;;) {
RC4_LOOP(indata, outdata, 0);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 1);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 2);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 3);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 4);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 5);
if (--i == 0)
break;
RC4_LOOP(indata, outdata, 6);
if (--i == 0)
break;
}
}
key->x = x;
key->y = y;
}

5
crypto/rc4/rc4_local.h Normal file
View File

@@ -0,0 +1,5 @@
/* $OpenBSD: rc4_local.h,v 1.1 2022/11/26 16:08:54 tb Exp $ */
#ifndef HEADER_RC4_LOCL_H
#define HEADER_RC4_LOCL_H
#endif

115
crypto/rc4/rc4_skey.c Normal file
View File

@@ -0,0 +1,115 @@
/* $OpenBSD: rc4_skey.c,v 1.15 2022/11/26 16:08:54 tb Exp $ */
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
#include <openssl/rc4.h>
#include "rc4_local.h"
#include <openssl/opensslv.h>
const char *
RC4_options(void)
{
#ifdef RC4_INDEX
if (sizeof(RC4_INT) == 1)
return("rc4(idx,char)");
else
return("rc4(idx,int)");
#else
if (sizeof(RC4_INT) == 1)
return("rc4(ptr,char)");
else
return("rc4(ptr,int)");
#endif
}
/* RC4 as implemented from a posting from
* Newsgroups: sci.crypt
* From: sterndark@netcom.com (David Sterndark)
* Subject: RC4 Algorithm revealed.
* Message-ID: <sternCvKL4B.Hyy@netcom.com>
* Date: Wed, 14 Sep 1994 06:35:31 GMT
*/
void
RC4_set_key(RC4_KEY *key, int len, const unsigned char *data)
{
RC4_INT tmp;
int id1, id2;
RC4_INT *d;
unsigned int i;
d = &(key->data[0]);
key->x = 0;
key->y = 0;
id1 = id2 = 0;
#define SK_LOOP(d,n) { \
tmp=d[(n)]; \
id2 = (data[id1] + tmp + id2) & 0xff; \
if (++id1 == len) id1=0; \
d[(n)]=d[id2]; \
d[id2]=tmp; }
for (i = 0; i < 256; i++)
d[i] = i;
for (i = 0; i < 256; i += 4) {
SK_LOOP(d, i + 0);
SK_LOOP(d, i + 1);
SK_LOOP(d, i + 2);
SK_LOOP(d, i + 3);
}
}