提交 6c83629b 编写于 作者: A Andy Polyakov

AESNI engine: add counter mode.

上级 fead2539
......@@ -23,7 +23,8 @@ require "x86asm.pl";
&asm_init($ARGV[0],$0);
$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups");
if ($PREFIX eq "aesni") { $movekey=*movaps; }
else { $movekey=*movups; }
$len="eax";
$rounds="ecx";
......@@ -41,7 +42,7 @@ $rndkey1="xmm4";
$ivec="xmm5";
$in0="xmm6";
$in1="xmm7"; $inout3="xmm7";
# Inline version of internal aesni_[en|de]crypt1
sub aesni_inline_generate1
{ my $p=shift;
......@@ -104,7 +105,7 @@ sub aesni_generate1 # fully unrolled loop
&ret();
&function_end_B("_aesni_${p}rypt1");
}
# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
&aesni_generate1("enc") if (!$inline);
&function_begin_B("${PREFIX}_encrypt");
......@@ -136,7 +137,7 @@ sub aesni_generate1 # fully unrolled loop
&movups (&QWP(0,"eax"),$inout0);
&ret ();
&function_end_B("${PREFIX}_decrypt");
# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
# latency is 6, it turned out that it can be scheduled only every
......@@ -229,8 +230,9 @@ sub aesni_generate4
&aesni_generate3("dec");
&aesni_generate4("enc") if ($PREFIX eq "aesni");
&aesni_generate4("dec");
if ($PREFIX eq "aesni") {
######################################################################
# void aesni_ecb_encrypt (const void *in, void *out,
# size_t length, const AES_KEY *key,
# int enc);
......@@ -249,8 +251,9 @@ if ($PREFIX eq "aesni") {
&mov ($rounds_,$rounds); # backup $rounds
&jz (&label("ecb_decrypt"));
&sub ($len,0x40);
&cmp ($len,0x40);
&jbe (&label("ecb_enc_tail"));
&sub ($len,0x40);
&jmp (&label("ecb_enc_loop3"));
&set_label("ecb_enc_loop3",16);
......@@ -268,14 +271,13 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(-0x10,$out),$inout2);
&ja (&label("ecb_enc_loop3"));
&set_label("ecb_enc_tail");
&add ($len,0x40);
&jz (&label("ecb_ret"));
&cmp ($len,0x10);
&movups ($inout0,&QWP(0,$inp));
&je (&label("ecb_enc_one"));
&set_label("ecb_enc_tail");
&cmp ($len,0x20);
&movups ($inout0,&QWP(0,$inp));
&jb (&label("ecb_enc_one"));
&movups ($inout1,&QWP(0x10,$inp));
&je (&label("ecb_enc_two"));
&cmp ($len,0x30);
......@@ -309,10 +311,11 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(0x10,$out),$inout1);
&movups (&QWP(0x20,$out),$inout2);
&jmp (&label("ecb_ret"));
######################################################################
&set_label("ecb_decrypt",16);
&sub ($len,0x40);
&cmp ($len,0x40);
&jbe (&label("ecb_dec_tail"));
&sub ($len,0x40);
&jmp (&label("ecb_dec_loop3"));
&set_label("ecb_dec_loop3",16);
......@@ -330,14 +333,13 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(-0x10,$out),$inout2);
&ja (&label("ecb_dec_loop3"));
&set_label("ecb_dec_tail");
&add ($len,0x40);
&jz (&label("ecb_ret"));
&cmp ($len,0x10);
&movups ($inout0,&QWP(0,$inp));
&je (&label("ecb_dec_one"));
&set_label("ecb_dec_tail");
&cmp ($len,0x20);
&movups ($inout0,&QWP(0,$inp));
&jb (&label("ecb_dec_one"));
&movups ($inout1,&QWP(0x10,$inp));
&je (&label("ecb_dec_two"));
&cmp ($len,0x30);
......@@ -373,8 +375,173 @@ if ($PREFIX eq "aesni") {
&set_label("ecb_ret");
&function_end("aesni_ecb_encrypt");
}
######################################################################
# handles only complete blocks, operates on 32-bit counter and
# does not update *ivec! (see engine/eng_aesni.c for details)
#
# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
# size_t blocks, const AES_KEY *key,
# const char *ivec);
&function_begin("aesni_ctr32_encrypt_blocks");
&mov ($inp,&wparam(0));
&mov ($out,&wparam(1));
&mov ($len,&wparam(2));
&mov ($key,&wparam(3));
&mov ($rounds_,&wparam(4));
&mov ($key_,"esp");
&sub ("esp",60);
&and ("esp",-16); # align stack
&mov (&DWP(48,"esp"),$key_);
&movups ($inout3,&QWP(0,$rounds_)); # load ivec
# compose byte-swap control mask for pshufb on stack
&mov (&DWP(0,"esp"),0x0c0d0e0f);
&mov (&DWP(4,"esp"),0x08090a0b);
&mov (&DWP(8,"esp"),0x04050607);
&mov (&DWP(12,"esp"),0x00010203);
# compose counter increment vector on stack
&mov ($rounds,3);
&xor ($key_,$key_);
&mov (&DWP(16,"esp"),$rounds);
&mov (&DWP(20,"esp"),$rounds);
&mov (&DWP(24,"esp"),$rounds);
&mov (&DWP(28,"esp"),$key_);
&pextrd ($rounds_,$inout3,3); # pull 32-bit counter
&pinsrd ($inout3,$key_,3); # wipe 32-bit counter
&mov ($rounds,&DWP(240,$key)); # key->rounds
&movaps ($rndkey0,&QWP(0,"esp")); # load byte-swap mask
# $ivec is vector of 3 32-bit counters
&pxor ($ivec,$ivec);
&bswap ($rounds_);
&pinsrd ($ivec,$rounds_,0);
&inc ($rounds_);
&pinsrd ($ivec,$rounds_,1);
&inc ($rounds_);
&pinsrd ($ivec,$rounds_,2);
&cmp ($len,4);
&pshufb ($ivec,$rndkey0); # byte swap
&jbe (&label("ctr32_tail"));
&movaps (&QWP(32,"esp"),$inout3); # save counter-less ivec
&mov ($rounds_,$rounds);
&mov ($key_,$key);
&sub ($len,4);
&jmp (&label("ctr32_loop3"));
&set_label("ctr32_loop3",16);
&pshufd ($inout0,$ivec,3<<6); # place counter to upper dword
&pshufd ($inout1,$ivec,2<<6);
&pshufd ($inout2,$ivec,1<<6);
&por ($inout0,$inout3); # merge counter-less ivec
&por ($inout1,$inout3);
&por ($inout2,$inout3);
&call ("_aesni_encrypt3");
&movaps($rndkey0,&QWP(0,"esp")); # load byte-swap mask
&movups ($in0,&QWP(0,$inp));
&movups ($in1,&QWP(0x10,$inp));
&movups ($rndkey1,&QWP(0x20,$inp));
&pshufb($ivec,$rndkey0); # byte swap
&paddd ($ivec,&QWP(16,"esp")); # counter increment
&pxor ($in0,$inout0);
&pxor ($in1,$inout1);
&pxor ($rndkey1,$inout2);
&movups (&QWP(0,$out),$in0);
&movups (&QWP(0x10,$out),$in1);
&movups (&QWP(0x20,$out),$rndkey1);
&movaps ($inout3,&QWP(32,"esp")); # load counter-less ivec
&pshufb($ivec,$rndkey0); # byte swap
&sub ($len,3);
&lea ($inp,&DWP(0x30,$inp));
&lea ($out,&DWP(0x30,$out));
&mov ($key,$key_);
&mov ($rounds,$rounds_);
&ja (&label("ctr32_loop3"));
&add ($len,4);
&pextrd ($rounds_,$ivec,1); # might need last counter value
&jz (&label("ctr32_ret"));
&bswap ($rounds_);
&set_label("ctr32_tail");
&cmp ($len,2);
&pshufd ($inout0,$ivec,3<<6);
&pshufd ($inout1,$ivec,2<<6);
&pshufd ($inout2,$ivec,1<<6);
&por ($inout0,$inout3);
&jb (&label("ctr32_one"));
&por ($inout1,$inout3);
&je (&label("ctr32_two"));
&cmp ($len,3);
&por ($inout2,$inout3);
&je (&label("ctr32_three"));
&inc ($rounds_); # compose last counter value
&bswap ($rounds_);
&pinsrd ($inout3,$rounds_,3);
&call ("_aesni_encrypt4");
&movups ($in0,&QWP(0,$inp));
&movups ($rndkey1,&QWP(0x10,$inp));
&movups ($rndkey0,&QWP(0x20,$inp));
&movups ($ivec,&QWP(0x30,$inp));
&pxor ($in0,$inout0);
&pxor ($rndkey1,$inout1);
&pxor ($rndkey0,$inout2);
&pxor ($ivec,$inout3);
&movups (&QWP(0,$out),$in0);
&movups (&QWP(0x10,$out),$rndkey1);
&movups (&QWP(0x20,$out),$rndkey0);
&movups (&QWP(0x30,$out),$ivec);
&jmp (&label("ctr32_ret"));
&set_label("ctr32_one",16);
if ($inline)
{ &aesni_inline_generate1("enc"); }
else
{ &call ("_aesni_encrypt1"); }
&movups ($in0,&QWP(0,$inp));
&pxor ($in0,$inout0);
&movups (&QWP(0,$out),$in0);
&jmp (&label("ctr32_ret"));
&set_label("ctr32_two",16);
&call ("_aesni_encrypt3");
&movups ($in0,&QWP(0,$inp));
&movups ($in1,&QWP(0x10,$inp));
&pxor ($in0,$inout0);
&pxor ($in1,$inout1);
&movups (&QWP(0,$out),$in0);
&movups (&QWP(0x10,$out),$in1);
&jmp (&label("ctr32_ret"));
&set_label("ctr32_three",16);
&call ("_aesni_encrypt3");
&movups ($in0,&QWP(0,$inp));
&movups ($in1,&QWP(0x10,$inp));
&movups ($rndkey1,&QWP(0x20,$inp));
&pxor ($in0,$inout0);
&pxor ($in1,$inout1);
&pxor ($rndkey1,$inout2);
&movups (&QWP(0,$out),$in0);
&movups (&QWP(0x10,$out),$in1);
&movups (&QWP(0x20,$out),$rndkey1);
&set_label("ctr32_ret");
&mov ("esp",&DWP(48,"esp"));
&function_end("aesni_ctr32_encrypt_blocks");
}
######################################################################
# void $PREFIX_cbc_encrypt (const void *inp, void *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivp,const int enc);
......@@ -431,10 +598,11 @@ if ($PREFIX eq "aesni") {
&mov ($inp,$out); # $inp and $out are the same
&mov ($key,$key_); # restore $key
&jmp (&label("cbc_enc_loop"));
######################################################################
&set_label("cbc_decrypt",16);
&sub ($len,0x40);
&cmp ($len,0x40);
&jbe (&label("cbc_dec_tail"));
&sub ($len,0x40);
&jmp (&label("cbc_dec_loop3"));
&set_label("cbc_dec_loop3",16);
......@@ -458,10 +626,10 @@ if ($PREFIX eq "aesni") {
&movups (&QWP(-0x10,$out),$inout2);
&ja (&label("cbc_dec_loop3"));
&set_label("cbc_dec_tail");
&add ($len,0x40);
&jz (&label("cbc_ret"));
&set_label("cbc_dec_tail");
&movups ($inout0,&QWP(0,$inp));
&cmp ($len,0x10);
&movaps ($in0,$inout0);
......@@ -539,7 +707,8 @@ if ($PREFIX eq "aesni") {
&mov ($key_,&wparam(4));
&movups (&QWP(0,$key_),$ivec); # output IV
&function_end("${PREFIX}_cbc_encrypt");
######################################################################
# Mechanical port from aesni-x86_64.pl.
#
# _aesni_set_encrypt_key is private interface,
......
......@@ -41,7 +41,7 @@ $inp="%rdi";
$out="%rsi";
$len="%rdx";
$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!!
$ivp="%r8"; # cbc
$ivp="%r8"; # cbc, ctr
$rnds_="%r10d"; # backup copy for $rounds
$key_="%r11"; # backup copy for $key
......@@ -51,7 +51,7 @@ $inout0="%xmm0"; $inout1="%xmm1";
$inout2="%xmm2"; $inout3="%xmm3";
$rndkey0="%xmm4"; $rndkey1="%xmm5";
$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt
$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt, CTR
$in1="%xmm8"; $in2="%xmm9";
# Inline version of internal aesni_[en|de]crypt1.
......@@ -214,6 +214,7 @@ ___
&aesni_generate4("dec");
if ($PREFIX eq "aesni") {
########################################################################
# void aesni_ecb_encrypt (const void *in, void *out,
# size_t length, const AES_KEY *key,
# int enc);
......@@ -232,8 +233,9 @@ aesni_ecb_encrypt:
mov $rounds,$rnds_ # backup $rounds
jz .Lecb_decrypt
#--------------------------- ECB ENCRYPT ------------------------------#
sub \$0x40,$len
cmp \$0x40,$len
jbe .Lecb_enc_tail
sub \$0x40,$len
jmp .Lecb_enc_loop3
.align 16
.Lecb_enc_loop3:
......@@ -251,14 +253,13 @@ aesni_ecb_encrypt:
movups $inout2,-0x10($out)
ja .Lecb_enc_loop3
.Lecb_enc_tail:
add \$0x40,$len
jz .Lecb_ret
cmp \$0x10,$len
movups ($inp),$inout0
je .Lecb_enc_one
.Lecb_enc_tail:
cmp \$0x20,$len
movups ($inp),$inout0
jb .Lecb_enc_one
movups 0x10($inp),$inout1
je .Lecb_enc_two
cmp \$0x30,$len
......@@ -294,8 +295,9 @@ $code.=<<___;
#--------------------------- ECB DECRYPT ------------------------------#
.align 16
.Lecb_decrypt:
sub \$0x40,$len
cmp \$0x40,$len
jbe .Lecb_dec_tail
sub \$0x40,$len
jmp .Lecb_dec_loop3
.align 16
.Lecb_dec_loop3:
......@@ -313,14 +315,13 @@ $code.=<<___;
movups $inout2,-0x10($out)
ja .Lecb_dec_loop3
.Lecb_dec_tail:
add \$0x40,$len
jz .Lecb_ret
cmp \$0x10,$len
movups ($inp),$inout0
je .Lecb_dec_one
.Lecb_dec_tail:
cmp \$0x20,$len
movups ($inp),$inout0
jb .Lecb_dec_one
movups 0x10($inp),$inout1
je .Lecb_dec_two
cmp \$0x30,$len
......@@ -357,8 +358,175 @@ $code.=<<___;
ret
.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
___
######################################################################
# handles only complete blocks, operates on 32-bit counter and
# does not update *ivec! (see engine/eng_aesni.c for details)
#
# void aesni_ctr32_encrypt_blocks (const void *in, void *out,
# size_t blocks, const AES_KEY *key,
# const char *ivec);
$increment="%xmm10";
$bswap_mask="%xmm11";
$code.=<<___;
.globl aesni_ctr32_encrypt_blocks
.type aesni_ctr32_encrypt_blocks,\@function,5
.align 16
aesni_ctr32_encrypt_blocks:
___
$code.=<<___ if ($win64);
lea -0x68(%rsp),%rsp
movaps %xmm6,(%rsp)
movaps %xmm7,0x10(%rsp)
movaps %xmm8,0x20(%rsp)
movaps %xmm9,0x30(%rsp)
movaps %xmm10,0x40(%rsp)
movaps %xmm11,0x50(%rsp)
.Lctr32_body:
___
$code.=<<___;
movups ($ivp),$inout3
movaps .Lincrement(%rip),$increment
movaps .Lbswap_mask(%rip),$bswap_mask
xor $rounds,$rounds
pextrd \$3,$inout3,$rnds_ # pull 32-bit counter
pinsrd \$3,$rounds,$inout3 # wipe 32-bit counter
mov 240($key),$rounds # key->rounds
pxor $iv,$iv # vector of 3 32-bit counters
bswap $rnds_
pinsrd \$0,$rnds_,$iv
inc $rnds_
pinsrd \$1,$rnds_,$iv
inc $rnds_
pinsrd \$2,$rnds_,$iv
cmp \$4,$len
pshufb $bswap_mask,$iv
jbe .Lctr32_tail
mov $rounds,$rnds_
mov $key,$key_
sub \$4,$len
jmp .Lctr32_loop3
.align 16
.Lctr32_loop3:
pshufd \$`3<<6`,$iv,$inout0 # place counter to upper dword
pshufd \$`2<<6`,$iv,$inout1
pshufd \$`1<<6`,$iv,$inout2
movups ($inp),$in0
movups 0x10($inp),$in1
movups 0x20($inp),$in2
por $inout3,$inout0 # merge counter-less ivec
por $inout3,$inout1
por $inout3,$inout2
pshufb $bswap_mask,$iv
call _aesni_encrypt3
paddd $increment,$iv
pxor $inout0,$in0
pxor $inout1,$in1
pxor $inout2,$in2
pshufb $bswap_mask,$iv
movups $in0,($out)
movups $in1,0x10($out)
movups $in2,0x20($out)
sub \$3,$len
lea 0x30($inp),$inp
lea 0x30($out),$out
mov $key_,$key
mov $rnds_,$rounds
ja .Lctr32_loop3
add \$4,$len
pextrd \$1,$iv,$rnds_ # migh need last counter value
jz .Lctr32_done
bswap $rnds_
.Lctr32_tail:
cmp \$2,$len
pshufd \$`3<<6`,$iv,$inout0
pshufd \$`2<<6`,$iv,$inout1
pshufd \$`1<<6`,$iv,$inout2
por $inout3,$inout0
movups ($inp),$in0
jb .Lctr32_one
por $inout3,$inout1
movups 0x10($inp),$in1
je .Lctr32_two
cmp \$3,$len
por $inout3,$inout2
movups 0x20($inp),$in2
je .Lctr32_three
inc $rnds_ # compose last counter value
bswap $rnds_
pinsrd \$3,$rnds_,$inout3
movups 0x30($inp),$iv
call _aesni_encrypt4
pxor $inout0,$in0
pxor $inout1,$in1
pxor $inout2,$in2
pxor $inout3,$iv
movups $in0,($out)
movups $in1,0x10($out)
movups $in2,0x20($out)
movups $iv,0x30($out)
jmp .Lctr32_done
.align 16
.Lctr32_one:
___
&aesni_generate1("enc",$key,$rounds);
$code.=<<___;
pxor $inout0,$in0
movups $in0,($out)
jmp .Lctr32_done
.align 16
.Lctr32_two:
call _aesni_encrypt3
pxor $inout0,$in0
pxor $inout1,$in1
movups $in0,($out)
movups $in1,0x10($out)
jmp .Lctr32_done
.align 16
.Lctr32_three:
call _aesni_encrypt3
pxor $inout0,$in0
pxor $inout1,$in1
pxor $inout2,$in2
movups $in0,($out)
movups $in1,0x10($out)
movups $in2,0x20($out)
.Lctr32_done:
___
$code.=<<___ if ($win64);
movaps (%rsp),%xmm6
movaps 0x10(%rsp),%xmm7
movaps 0x20(%rsp),%xmm8
movaps 0x30(%rsp),%xmm9
movaps 0x40(%rsp),%xmm10
movaps 0x50(%rsp),%xmm11
lea 0x68(%rsp),%rsp
___
$code.=<<___;
.Lctr32_ret:
ret
.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
___
}
########################################################################
# void $PREFIX_cbc_encrypt (const void *inp, void *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivp,const int enc);
......@@ -429,9 +597,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
movups ($ivp),$iv
sub \$0x40,$len
cmp \$0x40,$len
mov $rnds_,$rounds
jbe .Lcbc_dec_tail
sub \$0x40,$len
jmp .Lcbc_dec_loop3
.align 16
.Lcbc_dec_loop3:
......@@ -456,11 +625,11 @@ $code.=<<___;
movups $inout2,-0x10($out)
ja .Lcbc_dec_loop3
.Lcbc_dec_tail:
add \$0x40,$len
movups $iv,($ivp)
jz .Lcbc_dec_ret
.Lcbc_dec_tail:
movups ($inp),$inout0
cmp \$0x10,$len
movaps $inout0,$in0
......@@ -796,6 +965,11 @@ ___
}
$code.=<<___;
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lincrement:
.long 3,3,3,0
.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
___
......@@ -810,9 +984,11 @@ $disp="%r9";
$code.=<<___;
.extern __imp_RtlVirtualUnwind
.type cbc_se_handler,\@abi-omnipotent
___
$code.=<<___ if ($PREFIX eq "aesni");
.type ecb_se_handler,\@abi-omnipotent
.align 16
cbc_se_handler:
ecb_se_handler:
push %rsi
push %rdi
push %rbx
......@@ -825,30 +1001,48 @@ cbc_se_handler:
sub \$64,%rsp
mov 152($context),%rax # pull context->Rsp
mov 8(%rax),%rdi
mov 16(%rax),%rsi
mov %rsi,168($context) # restore context->Rsi
mov %rdi,176($context) # restore context->Rdi
jmp .Lcommon_seh_exit
.size ecb_se_handler,.-ecb_se_handler
.type ctr32_se_handler,\@abi-omnipotent
.align 16
ctr32_se_handler:
push %rsi
push %rdi
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
pushfq
sub \$64,%rsp
mov 120($context),%rax # pull context->Rax
mov 248($context),%rbx # pull context->Rip
lea .Lcbc_decrypt(%rip),%r10
lea .Lctr32_body(%rip),%r10
cmp %r10,%rbx # context->Rip<"prologue" label
jb .Lin_prologue
jb .Lin_ctr32_prologue
lea .Lcbc_decrypt_body(%rip),%r10
cmp %r10,%rbx # context->Rip<cbc_decrypt_body
jb .Lrestore_rax
mov 152($context),%rax # pull context->Rsp
lea .Lcbc_ret(%rip),%r10
cmp %r10,%rbx # context->Rip>="epilogue" label
jae .Lin_prologue
lea .Lctr32_ret(%rip),%r10
cmp %r10,%rbx
jae .Lin_ctr32_prologue
lea 0(%rax),%rsi # top of stack
lea 512($context),%rdi # &context.Xmm6
mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
mov \$12,%ecx # 6*sizeof(%xmm0)/sizeof(%rax)
.long 0xa548f3fc # cld; rep movsq
lea 0x58(%rax),%rax # adjust stack pointer
jmp .Lin_prologue
lea 0x68(%rax),%rax # adjust stack pointer
.Lrestore_rax:
mov 120($context),%rax
.Lin_prologue:
.Lin_ctr32_prologue:
mov 8(%rax),%rdi
mov 16(%rax),%rsi
mov %rax,152($context) # restore context->Rsp
......@@ -856,11 +1050,12 @@ cbc_se_handler:
mov %rdi,176($context) # restore context->Rdi
jmp .Lcommon_seh_exit
.size cbc_se_handler,.-cbc_se_handler
.type ecb_se_handler,\@abi-omnipotent
.size ctr32_se_handler,.-ctr32_se_handler
___
$code.=<<___;
.type cbc_se_handler,\@abi-omnipotent
.align 16
ecb_se_handler:
cbc_se_handler:
push %rsi
push %rdi
push %rbx
......@@ -873,8 +1068,33 @@ ecb_se_handler:
sub \$64,%rsp
mov 152($context),%rax # pull context->Rsp
mov 248($context),%rbx # pull context->Rip
lea .Lcbc_decrypt(%rip),%r10
cmp %r10,%rbx # context->Rip<"prologue" label
jb .Lin_cbc_prologue
lea .Lcbc_decrypt_body(%rip),%r10
cmp %r10,%rbx # context->Rip<cbc_decrypt_body
jb .Lrestore_cbc_rax
lea .Lcbc_ret(%rip),%r10
cmp %r10,%rbx # context->Rip>="epilogue" label
jae .Lin_cbc_prologue
lea 0(%rax),%rsi # top of stack
lea 512($context),%rdi # &context.Xmm6
mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
.long 0xa548f3fc # cld; rep movsq
lea 0x58(%rax),%rax # adjust stack pointer
jmp .Lin_cbc_prologue
.Lrestore_cbc_rax:
mov 120($context),%rax
.Lin_cbc_prologue:
mov 8(%rax),%rdi
mov 16(%rax),%rsi
mov %rax,152($context) # restore context->Rsp
mov %rsi,168($context) # restore context->Rsi
mov %rdi,176($context) # restore context->Rdi
......@@ -915,10 +1135,17 @@ ecb_se_handler:
.section .pdata
.align 4
.rva .LSEH_begin_${PREFIX}_ecb_encrypt
.rva .LSEH_end_${PREFIX}_ecb_encrypt
___
$code.=<<___ if ($PREFIX eq "aesni");
.rva .LSEH_begin_aesni_ecb_encrypt
.rva .LSEH_end_aesni_ecb_encrypt
.rva .LSEH_info_ecb
.rva .LSEH_begin_aesni_ctr32_encrypt_blocks
.rva .LSEH_end_aesni_ctr32_encrypt_blocks
.rva .LSEH_info_ctr32
___
$code.=<<___;
.rva .LSEH_begin_${PREFIX}_cbc_encrypt
.rva .LSEH_end_${PREFIX}_cbc_encrypt
.rva .LSEH_info_cbc
......@@ -932,9 +1159,16 @@ ecb_se_handler:
.rva .LSEH_info_key
.section .xdata
.align 8
___
$code.=<<___ if ($PREFIX eq "aesni");
.LSEH_info_ecb:
.byte 9,0,0,0
.rva ecb_se_handler
.LSEH_info_ctr32:
.byte 9,0,0,0
.rva ctr32_se_handler
___
$code.=<<___;
.LSEH_info_cbc:
.byte 9,0,0,0
.rva cbc_se_handler
......
......@@ -111,6 +111,35 @@ void ENGINE_load_aesni (void)
}
#ifdef COMPILE_HW_AESNI
typedef unsigned int u32;
typedef unsigned char u8;
#if defined(__GNUC__) && __GNUC__>=2
# define BSWAP4(x) ({ u32 ret=(x); \
asm volatile ("bswapl %0" \
: "+r"(ret)); ret; })
#elif defined(_MSC_VER)
# if _MSC_VER>=1300
# pragma intrinsic(_byteswap_ulong)
# define BSWAP4(x) _byteswap_ulong((u32)(x))
# elif defined(_M_IX86)
__inline u32 _bswap4(u32 val) {
_asm mov eax,val
_asm bswap eax
}
# define BSWAP4(x) _bswap4(x)
# endif
#endif
#ifdef BSWAP4
#define GETU32(p) BSWAP4(*(const u32 *)(p))
#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
#else
#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
#endif
int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
AES_KEY *key);
int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
......@@ -132,6 +161,12 @@ void aesni_cbc_encrypt(const unsigned char *in,
const AES_KEY *key,
unsigned char *ivec, int enc);
void aesni_ctr32_encrypt_blocks(const unsigned char *in,
unsigned char *out,
size_t blocks,
const AES_KEY *key,
const unsigned char *ivec);
/* Function for ENGINE detection and control */
static int aesni_init(ENGINE *e);
......@@ -224,16 +259,19 @@ static int aesni_cipher_nids[] = {
NID_aes_128_cbc,
NID_aes_128_cfb,
NID_aes_128_ofb,
NID_aes_128_ctr,
NID_aes_192_ecb,
NID_aes_192_cbc,
NID_aes_192_cfb,
NID_aes_192_ofb,
NID_aes_192_ctr,
NID_aes_256_ecb,
NID_aes_256_cbc,
NID_aes_256_cfb,
NID_aes_256_ofb,
NID_aes_256_ctr,
};
static int aesni_cipher_nids_num =
(sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0]));
......@@ -251,18 +289,28 @@ aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key,
int ret;
AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE
|| (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE
|| enc)
ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key);
else
if (((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_ECB_MODE
|| (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CBC_MODE)
&& !enc)
ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key);
else
ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key);
if(ret < 0) {
EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
return 0;
}
if (ctx->cipher->flags&EVP_CIPH_CUSTOM_IV)
{
if (iv!=NULL)
memcpy (ctx->iv,iv,ctx->cipher->iv_len);
else {
EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_IV_SETUP_FAILED);
return 0;
}
}
return 1;
}
......@@ -336,6 +384,117 @@ DECLARE_AES_EVP(256,cbc,CBC);
DECLARE_AES_EVP(256,cfb,CFB);
DECLARE_AES_EVP(256,ofb,OFB);
static void ctr96_inc(unsigned char *counter) {
u32 n=12;
u8 c;
do {
--n;
c = counter[n];
++c;
counter[n] = c;
if (c) return;
} while (n);
}
static int aesni_counter(EVP_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t len)
{
AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
u32 n, ctr32;
n = ctx->num;
while (n && len) {
*(out++) = *(in++) ^ ctx->buf[n];
--len;
n = (n+1) % 16;
}
ctr32 = GETU32(ctx->iv+12);
while (len>=16) {
size_t blocks = len/16;
/*
* 1<<24 is just a not-so-small yet not-so-large number...
*/
if (blocks > (1U<<24)) blocks = (1U<<24);
/*
* As aesni_ctr32 operates on 32-bit counter, caller
* has to handle overflow. 'if' below detects the
* overflow, which is then handled by limiting the
* amount of blocks to the exact overflow point...
*/
ctr32 += (u32)blocks;
if (ctr32 < blocks) {
blocks -= ctr32;
ctr32 = 0;
}
aesni_ctr32_encrypt_blocks(in,out,blocks,key,ctx->iv);
/* aesni_ctr32 does not update ctx->iv, caller does: */
PUTU32(ctx->iv+12,ctr32);
/* ... overflow was detected, propogate carry. */
if (ctr32 == 0) ctr96_inc(ctx->iv);
blocks *= 16;
len -= blocks;
out += blocks;
in += blocks;
}
if (len) {
aesni_encrypt(ctx->iv,ctx->buf,key);
++ctr32;
PUTU32(ctx->iv+12,ctr32);
if (ctr32 == 0) ctr96_inc(ctx->iv);
while (len--) {
out[n] = in[n] ^ ctx->buf[n];
++n;
}
}
ctx->num = n;
return 1;
}
static const EVP_CIPHER aesni_128_ctr=
{
NID_aes_128_ctr,1,16,16,
EVP_CIPH_CUSTOM_IV,
aesni_init_key,
aesni_counter,
NULL,
sizeof(AESNI_KEY),
NULL,
NULL,
NULL,
NULL
};
static const EVP_CIPHER aesni_192_ctr=
{
NID_aes_192_ctr,1,24,16,
EVP_CIPH_CUSTOM_IV,
aesni_init_key,
aesni_counter,
NULL,
sizeof(AESNI_KEY),
NULL,
NULL,
NULL,
NULL
};
static const EVP_CIPHER aesni_256_ctr=
{
NID_aes_256_ctr,1,32,16,
EVP_CIPH_CUSTOM_IV,
aesni_init_key,
aesni_counter,
NULL,
sizeof(AESNI_KEY),
NULL,
NULL,
NULL,
NULL
};
static int
aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
const int **nids, int nid)
......@@ -360,6 +519,9 @@ aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
case NID_aes_128_ofb:
*cipher = &aesni_128_ofb;
break;
case NID_aes_128_ctr:
*cipher = &aesni_128_ctr;
break;
case NID_aes_192_ecb:
*cipher = &aesni_192_ecb;
......@@ -373,6 +535,9 @@ aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
case NID_aes_192_ofb:
*cipher = &aesni_192_ofb;
break;
case NID_aes_192_ctr:
*cipher = &aesni_192_ctr;
break;
case NID_aes_256_ecb:
*cipher = &aesni_256_ecb;
......@@ -386,6 +551,9 @@ aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
case NID_aes_256_ofb:
*cipher = &aesni_256_ofb;
break;
case NID_aes_256_ctr:
*cipher = &aesni_256_ctr;
break;
default:
/* Sorry, we don't support this NID */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册