提交 c3cddeae 编写于 作者: A Andy Polyakov

aes-s390x.pl: harmonize software-only code path [and minor optimization].

上级 df316fd4
...@@ -1684,11 +1684,9 @@ $code.=<<___; ...@@ -1684,11 +1684,9 @@ $code.=<<___;
lghi $i1,0x87 lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem ngr $i1,$i2 # rem
srlg $i2,$s0,63 # carry bit from lower half algr $s0,$s0
sllg $s0,$s0,1 alcgr $s1,$s1
sllg $s1,$s1,1
xgr $s0,$i1 xgr $s0,$i1
ogr $s1,$i2
.Lxts_km_start: .Lxts_km_start:
lrvgr $i1,$s0 # flip byte order lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1 lrvgr $i2,$s1
...@@ -1745,11 +1743,9 @@ $code.=<<___; ...@@ -1745,11 +1743,9 @@ $code.=<<___;
lghi $i1,0x87 lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem ngr $i1,$i2 # rem
srlg $i2,$s0,63 # carry bit from lower half algr $s0,$s0
sllg $s0,$s0,1 alcgr $s1,$s1
sllg $s1,$s1,1
xgr $s0,$i1 xgr $s0,$i1
ogr $s1,$i2
ltr $len,$len # clear zero flag ltr $len,$len # clear zero flag
br $ra br $ra
...@@ -1843,12 +1839,11 @@ $code.=<<___; ...@@ -1843,12 +1839,11 @@ $code.=<<___;
slgr $out,$inp slgr $out,$inp
xgr $s0,$s0 # clear upper half l${g} $s3,$stdframe($sp) # ivp
xgr $s1,$s1 llgf $s0,0($s3) # load iv
lrv $s0,$stdframe+4($sp) # load secno llgf $s1,4($s3)
lrv $s1,$stdframe+0($sp) llgf $s2,8($s3)
xgr $s2,$s2 llgf $s3,12($s3)
xgr $s3,$s3
stm${g} %r2,%r5,2*$SIZE_T($sp) stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2) la $key,0($key2)
larl $tbl,AES_Te larl $tbl,AES_Te
...@@ -1864,11 +1859,9 @@ $code.=<<___; ...@@ -1864,11 +1859,9 @@ $code.=<<___;
lghi %r1,0x87 lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half algr $s1,$s1
sllg $s1,$s1,1 alcgr $s3,$s3
sllg $s3,$s3,1
xgr $s1,%r1 xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3 lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits srlg $s0,$s1,32 # smash the tweak to 4x32-bits
...@@ -1917,11 +1910,9 @@ $code.=<<___; ...@@ -1917,11 +1910,9 @@ $code.=<<___;
lghi %r1,0x87 lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half algr $s1,$s1
sllg $s1,$s1,1 alcgr $s3,$s3
sllg $s3,$s3,1
xgr $s1,%r1 xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3 lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits srlg $s0,$s1,32 # smash the tweak to 4x32-bits
...@@ -1956,7 +1947,8 @@ $code.=<<___; ...@@ -1956,7 +1947,8 @@ $code.=<<___;
.size AES_xts_encrypt,.-AES_xts_encrypt .size AES_xts_encrypt,.-AES_xts_encrypt
___ ___
# void AES_xts_decrypt(const char *inp,char *out,size_t len, # void AES_xts_decrypt(const char *inp,char *out,size_t len,
# const AES_KEY *key1, const AES_KEY *key2,u64 secno); # const AES_KEY *key1, const AES_KEY *key2,
# const unsigned char iv[16]);
# #
$code.=<<___; $code.=<<___;
.globl AES_xts_decrypt .globl AES_xts_decrypt
...@@ -2028,11 +2020,9 @@ $code.=<<___ if (!$softonly); ...@@ -2028,11 +2020,9 @@ $code.=<<___ if (!$softonly);
lghi $i1,0x87 lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem ngr $i1,$i2 # rem
srlg $i2,$s0,63 # carry bit from lower half algr $s0,$s0
sllg $s0,$s0,1 alcgr $s1,$s1
sllg $s1,$s1,1
xgr $s0,$i1 xgr $s0,$i1
ogr $s1,$i2
lrvgr $i1,$s0 # flip byte order lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1 lrvgr $i2,$s1
...@@ -2089,12 +2079,11 @@ $code.=<<___; ...@@ -2089,12 +2079,11 @@ $code.=<<___;
srlg $len,$len,4 srlg $len,$len,4
slgr $out,$inp slgr $out,$inp
xgr $s0,$s0 # clear upper half l${g} $s3,$stdframe($sp) # ivp
xgr $s1,$s1 llgf $s0,0($s3) # load iv
lrv $s0,$stdframe+4($sp) # load secno llgf $s1,4($s3)
lrv $s1,$stdframe+0($sp) llgf $s2,8($s3)
xgr $s2,$s2 llgf $s3,12($s3)
xgr $s3,$s3
stm${g} %r2,%r5,2*$SIZE_T($sp) stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2) la $key,0($key2)
larl $tbl,AES_Te larl $tbl,AES_Te
...@@ -2113,11 +2102,9 @@ $code.=<<___; ...@@ -2113,11 +2102,9 @@ $code.=<<___;
lghi %r1,0x87 lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half algr $s1,$s1
sllg $s1,$s1,1 alcgr $s3,$s3
sllg $s3,$s3,1
xgr $s1,%r1 xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3 lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits srlg $s0,$s1,32 # smash the tweak to 4x32-bits
...@@ -2156,11 +2143,9 @@ $code.=<<___; ...@@ -2156,11 +2143,9 @@ $code.=<<___;
lghi %r1,0x87 lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half algr $s1,$s1
sllg $s1,$s1,1 alcgr $s3,$s3
sllg $s3,$s3,1
xgr $s1,%r1 xgr $s1,%r1
ogr $s3,%r0
lrvgr $i2,$s1 # flip byte order lrvgr $i2,$s1 # flip byte order
lrvgr $i3,$s3 lrvgr $i3,$s3
stmg $i2,$i3,$tweak($sp) # save the 1st tweak stmg $i2,$i3,$tweak($sp) # save the 1st tweak
...@@ -2176,11 +2161,9 @@ $code.=<<___; ...@@ -2176,11 +2161,9 @@ $code.=<<___;
lghi %r1,0x87 lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half algr $s1,$s1
sllg $s1,$s1,1 alcgr $s3,$s3
sllg $s3,$s3,1
xgr $s1,%r1 xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3 lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits srlg $s0,$s1,32 # smash the tweak to 4x32-bits
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册