Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
0ab8fd58
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0ab8fd58
编写于
3月 04, 2011
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
s390x assembler pack: tune-up and support for new z196 hardware.
上级
8aa6cff4
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
779 addition
and
31 deletion
+779
-31
crypto/aes/asm/aes-s390x.pl
crypto/aes/asm/aes-s390x.pl
+764
-25
crypto/bn/asm/s390x-mont.pl
crypto/bn/asm/s390x-mont.pl
+4
-4
crypto/modes/asm/ghash-s390x.pl
crypto/modes/asm/ghash-s390x.pl
+11
-2
未找到文件。
crypto/aes/asm/aes-s390x.pl
浏览文件 @
0ab8fd58
...
...
@@ -70,6 +70,18 @@
# remains z/Architecture specific. On z990 it was measured to perform
# 2x better than code generated by gcc 4.3.
# December 2010.
#
# Add support for z196 "cipher message with counter" instruction.
# Note however that it's disengaged, because it was measured to
# perform ~12% worse than vanilla km-based code...
# February 2011.
#
# Add AES_xts_[en|de]crypt. This includes support for z196
# km-xts-aes instructions, which deliver ~70% improvement at 8KB
# block size over vanilla km-based code.
$flavour
=
shift
;
if
(
$flavour
=~
/3[12]/
)
{
...
...
@@ -268,7 +280,7 @@ $code.=<<___;
.type _s390x_AES_encrypt,\@function
.align 16
_s390x_AES_encrypt:
st${g} $ra,
`$stdframe-$SIZE_T`
($sp)
st${g} $ra,
15*$SIZE_T
($sp)
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
...
...
@@ -432,7 +444,7 @@ _s390x_AES_encrypt:
or $s2,$i3
or $s3,$t3
l${g} $ra,
`$stdframe-$SIZE_T`
($sp)
l${g} $ra,
15*$SIZE_T
($sp)
xr $s0,$t0
xr $s1,$t2
x $s2,24($key)
...
...
@@ -594,7 +606,7 @@ $code.=<<___;
.type _s390x_AES_decrypt,\@function
.align 16
_s390x_AES_decrypt:
st${g} $ra,
`$stdframe-$SIZE_T`
($sp)
st${g} $ra,
15*$SIZE_T
($sp)
x $s0,0($key)
x $s1,4($key)
x $s2,8($key)
...
...
@@ -738,7 +750,7 @@ _s390x_AES_decrypt:
nr $i1,$mask
nr $i2,$mask
l${g} $ra,
`$stdframe-$SIZE_T`
($sp)
l${g} $ra,
15*$SIZE_T
($sp)
or $s1,$t1
l $t0,16($key)
l $t1,20($key)
...
...
@@ -1164,7 +1176,8 @@ $code.=<<___;
.size AES_set_decrypt_key,.-AES_set_decrypt_key
___
#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
########################################################################
# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
# size_t length, const AES_KEY *key,
# unsigned char *ivec, const int enc)
{
...
...
@@ -1365,13 +1378,14 @@ $code.=<<___;
.size AES_cbc_encrypt,.-AES_cbc_encrypt
___
}
#void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
########################################################################
# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
# size_t blocks, const AES_KEY *key,
# const unsigned char *ivec)
{
my
$inp
=
"
%r2
";
my
$out
=
"
%r
3
";
my
$len
=
"
%r
4
";
my
$out
=
"
%r
4
";
# blocks and out are swapped
my
$len
=
"
%r
3
";
my
$key
=
"
%r5
";
my
$iv0
=
"
%r5
";
my
$ivp
=
"
%r6
";
my
$fp
=
"
%r7
";
...
...
@@ -1381,6 +1395,9 @@ $code.=<<___;
.type AES_ctr32_encrypt,\@function
.align 16
AES_ctr32_encrypt:
xgr %r3,%r4 # flip %r3 and %r4, $out and $len
xgr %r4,%r3
xgr %r3,%r4
llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
___
$code
.=<<
___
if
(
!
$softonly
);
...
...
@@ -1415,20 +1432,75 @@ $code.=<<___ if (!$softonly);
st$
{
g
}
$fp
,
$SIZE_T
(
$sp
)
slgr
$len
,
$fp
brc
1
,
.
Lctr32_hw_
loop
# not zero, no borrow
brc
1
,
.
Lctr32_hw_
switch
# not zero, no borrow
algr
$fp
,
$len
# input is shorter than allocated buffer
lghi
$len
,
0
st$
{
g
}
$fp
,
$SIZE_T
(
$sp
)
.
Lctr32_hw_loop:
.
Lctr32_hw_switch:
___
$code
.=<<
___
if
(
0
);
######### kmctr code was measured to be ~12% slower
larl
$s0
,
OPENSSL_s390xcap_P
lg
$s0
,
8
(
$s0
)
tmhh
$s0
,
0x0004
# check for message_security-assist-4
jz
.
Lctr32_km_loop
llgfr
$s0
,
%r0
lgr
$s1
,
%r1
lghi
%r0
,
0
la
%r1
,
16
(
$sp
)
.
long
0xb92d2042
# kmctr %r4,%r2,%r2
llihh
%r0
,
0x8000
# check if kmctr supports the function code
srlg
%r0
,
%r0
,
0
(
$s0
)
ng
%r0
,
16
(
$sp
)
lgr
%r0
,
$s0
lgr
%r1
,
$s1
jz
.
Lctr32_km_loop
####### kmctr code
algr
$out
,
$inp
# restore $out
lgr
$s1
,
$len
# $s1 undertakes $len
j
.
Lctr32_kmctr_loop
.
align
16
.
Lctr32_kmctr_loop:
la
$s2
,
16
(
$sp
)
lgr
$s3
,
$fp
.
Lctr32_kmctr_prepare:
stg
$iv0
,
0
(
$s2
)
stg
$ivp
,
8
(
$s2
)
la
$s2
,
16
(
$s2
)
ahi
$ivp
,
1
# 32-bit increment, preserves upper half
brct
$s3
,
.
Lctr32_kmctr_prepare
#la $inp,0($inp) # inp
sllg
$len
,
$fp
,
4
# len
#la $out,0($out) # out
la
$s2
,
16
(
$sp
)
# iv
.
long
0xb92da042
# kmctr $out,$s2,$inp
brc
1
,
.-
4
# pay attention to "partial completion"
slgr
$s1
,
$fp
brc
1
,
.
Lctr32_kmctr_loop
# not zero, no borrow
algr
$fp
,
$s1
lghi
$s1
,
0
brc
4
+
1
,
.
Lctr32_kmctr_loop
# not zero
l$
{
g
}
$sp
,
0
(
$sp
)
lm$
{
g
}
%r6
,
$s3
,
6
*$SIZE_T
(
$sp
)
br
$ra
.
align
16
___
$code
.=
<<___;
.Lctr32_km_loop:
la $s2,16($sp)
lgr $s3,$fp
.
Lctr32_
hw
_prepare:
.Lctr32_
km
_prepare:
stg $iv0,0($s2)
stg $ivp,8($s2)
la $s2,16($s2)
ahi $ivp,1 # 32-bit increment, preserves upper half
brct
$s3
,
.
Lctr32_
hw
_prepare
brct $s3,.Lctr32_
km
_prepare
la $s0,16($sp) # inp
sllg $s1,$fp,4 # len
...
...
@@ -1439,7 +1511,7 @@ $code.=<<___ if (!$softonly);
la $s2,16($sp)
lgr $s3,$fp
slgr $s2,$inp
.
Lctr32_
hw
_xor:
.Lctr32_
km
_xor:
lg $s0,0($inp)
lg $s1,8($inp)
xg $s0,0($s2,$inp)
...
...
@@ -1447,22 +1519,22 @@ $code.=<<___ if (!$softonly);
stg $s0,0($out,$inp)
stg $s1,8($out,$inp)
la $inp,16($inp)
brct
$s3
,
.
Lctr32_
hw
_xor
brct $s3,.Lctr32_
km
_xor
slgr $len,$fp
brc
1
,
.
Lctr32_
hw
_loop
# not zero, no borrow
brc 1,.Lctr32_
km
_loop # not zero, no borrow
algr $fp,$len
lghi $len,0
brc
4
+
1
,
.
Lctr32_
hw
_loop
# not zero
brc 4+1,.Lctr32_
km
_loop # not zero
l${g} $s0,0($sp)
l${g} $s1,$SIZE_T($sp)
la $s2,16($sp)
.
Lctr32_
hw
_zap:
.Lctr32_
km
_zap:
stg $s0,0($s2)
stg $s0,8($s2)
la $s2,16($s2)
brct
$s1
,
.
Lctr32_
hw
_zap
brct $s1,.Lctr32_
km
_zap
la $sp,0($s0)
lm${g} %r6,$s3,6*$SIZE_T($sp)
...
...
@@ -1472,12 +1544,12 @@ $code.=<<___ if (!$softonly);
___
$code
.=
<<___;
stm${g} $key,$ra,5*$SIZE_T($sp)
sl${g}r $
out,$inp
sl${g}r $
inp,$out
larl $tbl,AES_Te
llgf $t1,12($ivp)
.Lctr32_loop:
stm${g} $inp,$
len
,2*$SIZE_T($sp)
stm${g} $inp,$
out
,2*$SIZE_T($sp)
llgf $s0,0($ivp)
llgf $s1,4($ivp)
llgf $s2,8($ivp)
...
...
@@ -1489,27 +1561,694 @@ $code.=<<___;
lm${g} $inp,$ivp,2*$SIZE_T($sp)
llgf $t1,16*$SIZE_T($sp)
x $s0,0($inp)
x $s0,0($inp,$out)
x $s1,4($inp,$out)
x $s2,8($inp,$out)
x $s3,12($inp,$out)
stm $s0,$s3,0($out)
la $out,16($out)
ahi $t1,1 # 32-bit increment
brct $len,.Lctr32_loop
lm${g} %r6,$ra,6*$SIZE_T($sp)
br $ra
.size AES_ctr32_encrypt,.-AES_ctr32_encrypt
___
}
########################################################################
# void AES_xts_encrypt(const char *inp,char *out,size_t len,
# const AES_KEY *key1, const AES_KEY *key2,u64 secno);
#
{
my
$inp
=
"
%r2
";
my
$out
=
"
%r4
";
# len and out are swapped
my
$len
=
"
%r3
";
my
$key1
=
"
%r5
";
# $i1
my
$key2
=
"
%r6
";
# $i2
my
$fp
=
"
%r7
";
# $i3
my
$tweak
=
16
*$SIZE_T
+
16
;
# or $stdframe-16, bottom of the frame...
$code
.=
<<___;
.type _s390x_xts_km,\@function
.align 16
_s390x_xts_km:
___
$code
.=<<
___
if
(
0
);
llgfr
$s0
,
%r0
# put aside the function code
lghi
$s1
,
0x7f
nr
$s1
,
%r0
lghi
%r0
,
0
# query capability vector
la
%r1
,
2
*$SIZE_T
(
$sp
)
.
long
0xb92e0042
# km %r4,%r2
llihh
%r1
,
0x8000
srlg
%r1
,
%r1
,
32
(
$s1
)
# check for 32+function code
ng
%r1
,
2
*$SIZE_T
(
$sp
)
lgr
%r0
,
$s0
# restore the function code
la
%r1
,
0
(
$key1
)
# restore $key1
jz
.
Lxts_km_vanilla
lmg
$i2
,
$i3
,
$tweak
(
$sp
)
# put aside the tweak value
algr
$out
,
$inp
oill
%r0
,
32
# switch to xts function code
aghi
$s1
,
-
18
#
sllg
$s1
,
$s1
,
3
# (function code - 18)*8, 0 or 16
la
%r1
,
$tweak
-
16
(
$sp
)
slgr
%r1
,
$s1
# parameter block position
lmg
$s0
,
$s3
,
0
(
$key1
)
# load 256 bits of key material,
stmg
$s0
,
$s3
,
0
(
%r1
)
# and copy it to parameter block.
# yes, it contains junk and overlaps
# with the tweak in 128-bit case.
# it's done to avoid conditional
# branch.
stmg
$i2
,
$i3
,
$tweak
(
$sp
)
# "re-seat" the tweak value
.
long
0xb92e0042
# km %r4,%r2
brc
1
,
.-
4
# pay attention to "partial completion"
lrvg
$s0
,
$tweak
+
0
(
$sp
)
# load the last tweak
lrvg
$s1
,
$tweak
+
8
(
$sp
)
stmg
%r0
,
%r3
,
$tweak
-
32
(
%r1
)
# wipe copy of the key
nill
%r0
,
0xffdf
# switch back to original function code
la
%r1
,
0
(
$key1
)
# restore pointer to $key1
slgr
$out
,
$inp
llgc
$len
,
2
*$SIZE_T
-
1
(
$sp
)
nill
$len
,
0x0f
# $len%=16
br
$ra
.
align
16
.
Lxts_km_vanilla:
___
$code
.=
<<___;
# prepare and allocate stack frame at the top of 4K page
# with 1K reserved for eventual signal handling
lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
lghi $s1,-4096
algr $s0,$sp
lgr $fp,$sp
ngr $s0,$s1 # align at page boundary
slgr $fp,$s0 # total buffer size
lgr $s2,$sp
lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
slgr $fp,$s1 # deduct reservation to get usable buffer size
# buffer size is at lest 256 and at most 3072+256-16
la $sp,1024($s0) # alloca
nill $fp,0xfff0 # round to 16*n
st${g} $s2,0($sp) # back-chain
nill $len,0xfff0 # redundant
st${g} $fp,$SIZE_T($sp)
slgr $len,$fp
brc 1,.Lxts_km_go # not zero, no borrow
algr $fp,$len # input is shorter than allocated buffer
lghi $len,0
st${g} $fp,$SIZE_T($sp)
.Lxts_km_go:
lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
lrvg $s1,$tweak+8($s2)
la $s2,16($sp) # vector of ascending tweak values
slgr $s2,$inp
srlg $s3,$fp,4
j .Lxts_km_start
.Lxts_km_loop:
la $s2,16($sp)
slgr $s2,$inp
srlg $s3,$fp,4
.Lxts_km_prepare:
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
srlg $i2,$s0,63 # carry bit from lower half
sllg $s0,$s0,1
sllg $s1,$s1,1
xgr $s0,$i1
ogr $s1,$i2
.Lxts_km_start:
lrvgr $i1,$s0 # flip byte order
lrvgr $i2,$s1
stg $i1,0($s2,$inp)
stg $i2,8($s2,$inp)
xg $i1,0($inp)
xg $i2,8($inp)
stg $i1,0($out,$inp)
stg $i2,8($out,$inp)
la $inp,16($inp)
brct $s3,.Lxts_km_prepare
slgr $inp,$fp # rewind $inp
la $s2,0($out,$inp)
lgr $s3,$fp
.long 0xb92e00aa # km $s2,$s2
brc 1,.-4 # pay attention to "partial completion"
la $s2,16($sp)
slgr $s2,$inp
srlg $s3,$fp,4
.Lxts_km_xor:
lg $i1,0($out,$inp)
lg $i2,8($out,$inp)
xg $i1,0($s2,$inp)
xg $i2,8($s2,$inp)
stg $i1,0($out,$inp)
stg $i2,8($out,$inp)
la $inp,16($inp)
brct $s3,.Lxts_km_xor
slgr $len,$fp
brc 1,.Lxts_km_loop # not zero, no borrow
algr $fp,$len
lghi $len,0
brc 4+1,.Lxts_km_loop # not zero
l${g} $i1,0($sp) # back-chain
llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
la $i2,16($sp)
srlg $fp,$fp,4
.Lxts_km_zap:
stg $i1,0($i2)
stg $i1,8($i2)
la $i2,16($i2)
brct $fp,.Lxts_km_zap
la $sp,0($i1)
llgc $len,2*$SIZE_T-1($i1)
nill $len,0x0f # $len%=16
bzr $ra
# generate one more tweak...
lghi $i1,0x87
srag $i2,$s1,63 # broadcast upper bit
ngr $i1,$i2 # rem
srlg $i2,$s0,63 # carry bit from lower half
sllg $s0,$s0,1
sllg $s1,$s1,1
xgr $s0,$i1
ogr $s1,$i2
ltr $len,$len # clear zero flag
br $ra
.size _s390x_xts_km,.-_s390x_xts_km
.globl AES_xts_encrypt
.type AES_xts_encrypt,\@function
.align 16
AES_xts_encrypt:
xgr %r3,%r4 # flip %r3 and %r4, $out and $len
xgr %r4,%r3
xgr %r3,%r4
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
llgfr
$len
,
$len
___
$code
.=
<<___;
st${g} $len,1*$SIZE_T($sp) # save copy of $len
srag $len,$len,4 # formally wrong, because it expands
# sign byte, but who can afford asking
# to process more than 2^63-1 bytes?
# I use it, because it sets condition
# code...
bcr 8,$ra # abort if zero (i.e. less than 16)
___
$code
.=<<
___
if
(
!
$softonly
);
llgf
%r0
,
240
(
$key2
)
lhi
%r1
,
16
clr
%r0
,
%r1
jl
.
Lxts_enc_software
stm$
{
g
}
%r6
,
$s3
,
6
*$SIZE_T
(
$sp
)
st$
{
g
}
$ra
,
14
*$SIZE_T
(
$sp
)
sllg
$len
,
$len
,
4
# $len&=~15
slgr
$out
,
$inp
lrvg
$s0
,
$stdframe
(
$sp
)
# load secno
lghi
$s1
,
0
la
$s2
,
$tweak
(
$sp
)
lghi
$s3
,
16
stmg
$s0
,
$s1
,
0
(
$s2
)
la
%r1
,
0
(
$key2
)
# $key2 is not needed anymore
.
long
0xb92e00aa
# km $s2,$s2, generate the tweak
brc
1
,
.-
4
# can this happen?
l
%r0
,
240
(
$key1
)
la
%r1
,
0
(
$key1
)
# $key1 is not needed anymore
bras
$ra
,
_s390x_xts_km
jz
.
Lxts_enc_km_done
aghi
$inp
,
-
16
# take one step back
la
$i3
,
0
(
$out
,
$inp
)
# put aside real $out
.
Lxts_enc_km_steal:
llgc
$i1
,
16
(
$inp
)
llgc
$i2
,
0
(
$out
,
$inp
)
stc
$i1
,
0
(
$out
,
$inp
)
stc
$i2
,
16
(
$out
,
$inp
)
la
$inp
,
1
(
$inp
)
brct
$len
,
.
Lxts_enc_km_steal
la
$s2
,
0
(
$i3
)
lghi
$s3
,
16
lrvgr
$i1
,
$s0
# flip byte order
lrvgr
$i2
,
$s1
xg
$i1
,
0
(
$s2
)
xg
$i2
,
8
(
$s2
)
stg
$i1
,
0
(
$s2
)
stg
$i2
,
8
(
$s2
)
.
long
0xb92e00aa
# km $s2,$s2
brc
1
,
.-
4
# can this happen?
lrvgr
$i1
,
$s0
# flip byte order
lrvgr
$i2
,
$s1
xg
$i1
,
0
(
$i3
)
xg
$i2
,
8
(
$i3
)
stg
$i1
,
0
(
$i3
)
stg
$i2
,
8
(
$i3
)
.
Lxts_enc_km_done:
l$
{
g
}
$ra
,
14
*$SIZE_T
(
$sp
)
st$
{
g
}
$sp
,
$tweak
(
$sp
)
# wipe tweak
st$
{
g
}
$sp
,
$tweak
(
$sp
)
lm$
{
g
}
%r6
,
$s3
,
6
*$SIZE_T
(
$sp
)
br
$ra
.
align
16
.
Lxts_enc_software:
___
$code
.=
<<___;
stm${g} %r6,$ra,6*$SIZE_T($sp)
slgr $out,$inp
xgr $s0,$s0 # clear upper half
xgr $s1,$s1
lrv $s0,$stdframe+4($sp) # load secno
lrv $s1,$stdframe+0($sp)
xgr $s2,$s2
xgr $s3,$s3
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
bras $ra,_s390x_AES_encrypt # generate the tweak
lm${g} %r2,%r5,2*$SIZE_T($sp)
stm $s0,$s3,$tweak($sp) # save the tweak
j .Lxts_enc_enter
.align 16
.Lxts_enc_loop:
lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
lrvg $s3,$tweak+8($sp)
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half
sllg $s1,$s1,1
sllg $s3,$s3,1
xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
stg $s3,$tweak+8($sp)
llgfr $s3,$s3
la $inp,16($inp) # $inp+=16
.Lxts_enc_enter:
x $s0,0($inp) # ^=*($inp)
x $s1,4($inp)
x $s2,8($inp)
x $s3,12($inp)
stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
la $key,0($key1)
bras $ra,_s390x_AES_encrypt
lm${g} %r2,%r5,2*$SIZE_T($sp)
x $s0,$tweak+0($sp) # ^=tweak
x $s1,$tweak+4($sp)
x $s2,$tweak+8($sp)
x $s3,$tweak+12($sp)
st $s0,0($out,$inp)
st $s1,4($out,$inp)
st $s2,8($out,$inp)
st $s3,12($out,$inp)
brct${g} $len,.Lxts_enc_loop
llgc $len,`2*$SIZE_T-1`($sp)
nill $len,0x0f # $len%16
jz .Lxts_enc_done
la $i3,0($inp,$out) # put aside real $out
.Lxts_enc_steal:
llgc %r0,16($inp)
llgc %r1,0($out,$inp)
stc %r0,0($out,$inp)
stc %r1,16($out,$inp)
la $inp,1($inp)
brct $len,.Lxts_enc_steal
la $out,0($i3) # restore real $out
# generate last tweak...
lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
lrvg $s3,$tweak+8($sp)
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half
sllg $s1,$s1,1
sllg $s3,$s3,1
xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
stg $s3,$tweak+8($sp)
llgfr $s3,$s3
x $s0,0($out) # ^=*(inp)|stolen cipther-text
x $s1,4($out)
x $s2,8($out)
x $s3,12($out)
st${g} $out,4*$SIZE_T($sp)
la $key,0($key1)
bras $ra,_s390x_AES_encrypt
l${g} $out,4*$SIZE_T($sp)
x $s0,`$tweak+0`($sp) # ^=tweak
x $s1,`$tweak+4`($sp)
x $s2,`$tweak+8`($sp)
x $s3,`$tweak+12`($sp)
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
.Lxts_enc_done:
stg $sp,$tweak+0($sp) # wipe tweak
stg $sp,$twesk+8($sp)
lm${g} %r6,$ra,6*$SIZE_T($sp)
br $ra
.size AES_xts_encrypt,.-AES_xts_encrypt
___
# void AES_xts_decrypt(const char *inp,char *out,size_t len,
# const AES_KEY *key1, const AES_KEY *key2,u64 secno);
#
$code
.=
<<___;
.globl AES_xts_decrypt
.type AES_xts_decrypt,\@function
.align 16
AES_xts_decrypt:
xgr %r3,%r4 # flip %r3 and %r4, $out and $len
xgr %r4,%r3
xgr %r3,%r4
___
$code
.=<<
___
if
(
$SIZE_T
==
4
);
llgfr
$len
,
$len
___
$code
.=
<<___;
st${g} $len,1*$SIZE_T($sp) # save copy of $len
aghi $len,-16
bcr 4,$ra # abort if less than zero. formally
# wrong, because $len is unsigned,
# but who can afford asking to
# process more than 2^63-1 bytes?
tmll $len,0x0f
jnz .Lxts_dec_proceed
aghi $len,16
.Lxts_dec_proceed:
___
$code
.=<<
___
if
(
!
$softonly
);
llgf
%r0
,
240
(
$key2
)
lhi
%r1
,
16
clr
%r0
,
%r1
jl
.
Lxts_dec_software
stm$
{
g
}
%r6
,
$s3
,
6
*$SIZE_T
(
$sp
)
st$
{
g
}
$ra
,
14
*$SIZE_T
(
$sp
)
nill
$len
,
0xfff0
# $len&=~15
slgr
$out
,
$inp
# generate the tweak value
lrvg
$s0
,
$stdframe
(
$sp
)
# load secno
lghi
$s1
,
0
la
$s2
,
$tweak
(
$sp
)
lghi
$s3
,
16
stg
$s0
,
0
(
$s2
)
stg
$s1
,
8
(
$s2
)
la
%r1
,
0
(
$key2
)
# $key2 is not needed past this point
.
long
0xb92e00aa
# km $s2,$s2, generate the tweak
brc
1
,
.-
4
# can this happen?
l
%r0
,
240
(
$key1
)
la
%r1
,
0
(
$key1
)
# $key1 is not needed anymore
ltgr
$len
,
$len
jz
.
Lxts_dec_km_short
bras
$ra
,
_s390x_xts_km
jz
.
Lxts_dec_km_done
lrvgr
$s2
,
$s0
# make copy in reverse byte order
lrvgr
$s3
,
$s1
j
.
Lxts_dec_km_2ndtweak
.
Lxts_dec_km_short:
llgc
$len
,`
2*
$SIZE_T
-1
`(
$sp
)
nill
$len
,
0x0f
# $len%=16
lrvg
$s0
,
$tweak
+
0
(
$sp
)
# load the tweak
lrvg
$s1
,
$tweak
+
8
(
$sp
)
lrvgr
$s2
,
$s0
# make copy in reverse byte order
lrvgr
$s3
,
$s1
.
Lxts_dec_km_2ndtweak:
lghi
$i1
,
0x87
srag
$i2
,
$s1
,
63
# broadcast upper bit
ngr
$i1
,
$i2
# rem
srlg
$i2
,
$s0
,
63
# carry bit from lower half
sllg
$s0
,
$s0
,
1
sllg
$s1
,
$s1
,
1
xgr
$s0
,
$i1
ogr
$s1
,
$i2
lrvgr
$i1
,
$s0
# flip byte order
lrvgr
$i2
,
$s1
xg
$i1
,
0
(
$inp
)
xg
$i2
,
8
(
$inp
)
stg
$i1
,
0
(
$out
,
$inp
)
stg
$i2
,
8
(
$out
,
$inp
)
la
$i2
,
0
(
$out
,
$inp
)
lghi
$i3
,
16
.
long
0xb92e0066
# km $i2,$i2
brc
1
,
.-
4
# can this happen?
lrvgr
$i1
,
$s0
lrvgr
$i2
,
$s1
xg
$i1
,
0
(
$out
,
$inp
)
xg
$i2
,
8
(
$out
,
$inp
)
stg
$i1
,
0
(
$out
,
$inp
)
stg
$i2
,
8
(
$out
,
$inp
)
la
$i3
,
0
(
$out
,
$inp
)
# put aside real $out
.
Lxts_dec_km_steal:
llgc
$i1
,
16
(
$inp
)
llgc
$i2
,
0
(
$out
,
$inp
)
stc
$i1
,
0
(
$out
,
$inp
)
stc
$i2
,
16
(
$out
,
$inp
)
la
$inp
,
1
(
$inp
)
brct
$len
,
.
Lxts_dec_km_steal
lgr
$s0
,
$s2
lgr
$s1
,
$s3
xg
$s0
,
0
(
$i3
)
xg
$s1
,
8
(
$i3
)
stg
$s0
,
0
(
$i3
)
stg
$s1
,
8
(
$i3
)
la
$s0
,
0
(
$i3
)
lghi
$s1
,
16
.
long
0xb92e0088
# km $s0,$s0
brc
1
,
.-
4
# can this happen?
xg
$s2
,
0
(
$i3
)
xg
$s3
,
8
(
$i3
)
stg
$s2
,
0
(
$i3
)
stg
$s3
,
8
(
$i3
)
.
Lxts_dec_km_done:
l$
{
g
}
$ra
,
14
*$SIZE_T
(
$sp
)
st$
{
g
}
$sp
,
$tweak
(
$sp
)
# wipe tweak
st$
{
g
}
$sp
,
$tweak
(
$sp
)
lm$
{
g
}
%r6
,
$s3
,
6
*$SIZE_T
(
$sp
)
br
$ra
.
align
16
.
Lxts_dec_software:
___
$code
.=
<<___;
stm${g} %r6,$ra,6*$SIZE_T($sp)
srlg $len,$len,4
slgr $out,$inp
xgr $s0,$s0 # clear upper half
xgr $s1,$s1
lrv $s0,$stdframe+4($sp) # load secno
lrv $s1,$stdframe+0($sp)
xgr $s2,$s2
xgr $s3,$s3
stm${g} %r2,%r5,2*$SIZE_T($sp)
la $key,0($key2)
larl $tbl,AES_Te
bras $ra,_s390x_AES_encrypt # generate the tweak
lm${g} %r2,%r5,2*$SIZE_T($sp)
larl $tbl,AES_Td
lt${g}r $len,$len
stm $s0,$s3,$tweak($sp) # save the tweak
jz .Lxts_dec_short
j .Lxts_dec_enter
.align 16
.Lxts_dec_loop:
lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
lrvg $s3,$tweak+8($sp)
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half
sllg $s1,$s1,1
sllg $s3,$s3,1
xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak+0($sp) # save the tweak
llgfr $s1,$s1
srlg $s2,$s3,32
stg $s3,$tweak+8($sp)
llgfr $s3,$s3
.Lxts_dec_enter:
x $s0,0($inp) # tweak^=*(inp)
x $s1,4($inp)
x $s2,8($inp)
x $s3,12($inp)
stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
la $key,0($key1)
bras $ra,_s390x_AES_decrypt
lm${g} %r2,%r5,2*$SIZE_T($sp)
x $s0,$tweak+0($sp) # ^=tweak
x $s1,$tweak+4($sp)
x $s2,$tweak+8($sp)
x $s3,$tweak+12($sp)
st $s0,0($out,$inp)
st $s1,4($out,$inp)
st $s2,8($out,$inp)
st $s3,12($out,$inp)
la $inp,16($inp)
ahi $t1,1 # 32-bit increment
brct $len,.Lctr32_loop
brct${g} $len,.Lxts_dec_loop
llgc $len,`2*$SIZE_T-1`($sp)
nill $len,0x0f # $len%16
jz .Lxts_dec_done
# generate pair of tweaks...
lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
lrvg $s3,$tweak+8($sp)
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half
sllg $s1,$s1,1
sllg $s3,$s3,1
xgr $s1,%r1
ogr $s3,%r0
lrvgr $i2,$s1 # flip byte order
lrvgr $i3,$s3
stmg $i2,$i3,$tweak($sp) # save the 1st tweak
j .Lxts_dec_2ndtweak
.align 16
.Lxts_dec_short:
llgc $len,`2*$SIZE_T-1`($sp)
nill $len,0x0f # $len%16
lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
lrvg $s3,$tweak+8($sp)
.Lxts_dec_2ndtweak:
lghi %r1,0x87
srag %r0,$s3,63 # broadcast upper bit
ngr %r1,%r0 # rem
srlg %r0,$s1,63 # carry bit from lower half
sllg $s1,$s1,1
sllg $s3,$s3,1
xgr $s1,%r1
ogr $s3,%r0
lrvgr $s1,$s1 # flip byte order
lrvgr $s3,$s3
srlg $s0,$s1,32 # smash the tweak to 4x32-bits
stg $s1,$tweak-16+0($sp) # save the 2nd tweak
llgfr $s1,$s1
srlg $s2,$s3,32
stg $s3,$tweak-16+8($sp)
llgfr $s3,$s3
x $s0,0($inp) # tweak_the_2nd^=*(inp)
x $s1,4($inp)
x $s2,8($inp)
x $s3,12($inp)
stm${g} %r2,%r3,2*$SIZE_T($sp)
la $key,0($key1)
bras $ra,_s390x_AES_decrypt
lm${g} %r2,%r5,2*$SIZE_T($sp)
x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
x $s1,$tweak-16+4($sp)
x $s2,$tweak-16+8($sp)
x $s3,$tweak-16+12($sp)
st $s0,0($out,$inp)
st $s1,4($out,$inp)
st $s2,8($out,$inp)
st $s3,12($out,$inp)
la $i3,0($out,$inp) # put aside real $out
.Lxts_dec_steal:
llgc %r0,16($inp)
llgc %r1,0($out,$inp)
stc %r0,0($out,$inp)
stc %r1,16($out,$inp)
la $inp,1($inp)
brct $len,.Lxts_dec_steal
la $out,0($i3) # restore real $out
lm $s0,$s3,$tweak($sp) # load the 1st tweak
x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
x $s1,4($out)
x $s2,8($out)
x $s3,12($out)
st${g} $out,4*$SIZE_T($sp)
la $key,0($key1)
bras $ra,_s390x_AES_decrypt
l${g} $out,4*$SIZE_T($sp)
x $s0,$tweak+0($sp) # ^=tweak
x $s1,$tweak+4($sp)
x $s2,$tweak+8($sp)
x $s3,$tweak+12($sp)
st $s0,0($out)
st $s1,4($out)
st $s2,8($out)
st $s3,12($out)
stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
stg $sp,$tweak-16+8($sp)
.Lxts_dec_done:
stg $sp,$tweak+0($sp) # wipe tweak
stg $sp,$twesk+8($sp)
lm${g} %r6,$ra,6*$SIZE_T($sp)
br $ra
.size AES_
ctr32_encrypt,.-AES_ctr32_en
crypt
.size AES_
xts_decrypt,.-AES_xts_de
crypt
___
}
$code
.=
<<___;
.comm OPENSSL_s390xcap_P,16,8
.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
.comm OPENSSL_s390xcap_P,16,8
___
$code
=~
s/\`([^\`]*)\`/eval $1/g
em
;
...
...
crypto/bn/asm/s390x-mont.pl
浏览文件 @
0ab8fd58
...
...
@@ -41,8 +41,8 @@
# processor, as long as it's "z-CPU". Latter implies that the code
# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
# is achieved by swapping words after 64-bit loads, follow _dswap-s.
# On z990 it was measured to perform 2.6-2.2 times better
, less for
# longer keys...
# On z990 it was measured to perform 2.6-2.2 times better
than
#
compiler-generated code, less for
longer keys...
$flavour
=
shift
;
...
...
@@ -102,8 +102,8 @@ $code.=<<___ if ($flavour =~ /3[12]/);
bnzr
%r14
# if ($num&1) return 0;
___
$code
.=<<
___
if
(
$flavour
!~
/3[12]/
);
cghi
$num
,
128
#
bhr
%r14
# if($num>
128
) return 0;
cghi
$num
,
96
#
bhr
%r14
# if($num>
96
) return 0;
___
$code
.=
<<___;
stm${g} %r3,%r15,3*$SIZE_T($sp)
...
...
crypto/modes/asm/ghash-s390x.pl
浏览文件 @
0ab8fd58
...
...
@@ -28,6 +28,15 @@
# remains z/Architecture specific. On z990 it was measured to perform
# 2.8x better than 32-bit code generated by gcc 4.3.
# March 2011.
#
# Support for hardware KIMD-GHASH is verified to produce correct
# result and therefore is engaged. On z196 it was measured to process
# 8KB buffer ~7 faster than software implementation. It's not as
# impressive for smaller buffer sizes and for smallest 16-bytes buffer
# it's actually almost 2 times slower. Which is the reason why
# KIMD-GHASH is not used in gcm_gmult_4bit.
$flavour
=
shift
;
if
(
$flavour
=~
/3[12]/
)
{
...
...
@@ -41,7 +50,7 @@ if ($flavour =~ /3[12]/) {
while
((
$output
=
shift
)
&&
(
$output
!~
/^\w[\w\-]*\.\w+$/
))
{}
open
STDOUT
,"
>
$output
";
$softonly
=
1
;
# disable hardware support for now
$softonly
=
0
;
$Zhi
=
"
%r0
";
$Zlo
=
"
%r1
";
...
...
@@ -70,7 +79,7 @@ $code.=<<___;
.align 32
gcm_gmult_4bit:
___
$code
.=<<
___
if
(
!
$softonly
);
$code
.=<<
___
if
(
!
$softonly
&&
0
);
# hardware is slow for single block...
larl
%r1
,
OPENSSL_s390xcap_P
lg
%r0
,
0
(
%r1
)
tmhl
%r0
,
0x4000
# check for message-security-assist
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录