Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
592eef5c
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
大约 1 年 前同步成功
通知
9
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
592eef5c
编写于
12月 14, 2015
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
s390x assembly pack: add ChaCha20 and Poly1305 modules.
Reviewed-by:
N
Tim Hudson
<
tjh@openssl.org
>
上级
5e355e0c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
537 addition
and
0 deletion
+537
-0
crypto/chacha/Makefile.in
crypto/chacha/Makefile.in
+2
-0
crypto/chacha/asm/chacha-s390x.pl
crypto/chacha/asm/chacha-s390x.pl
+317
-0
crypto/poly1305/Makefile.in
crypto/poly1305/Makefile.in
+2
-0
crypto/poly1305/asm/poly1305-s390x.pl
crypto/poly1305/asm/poly1305-s390x.pl
+216
-0
未找到文件。
crypto/chacha/Makefile.in
浏览文件 @
592eef5c
...
...
@@ -36,6 +36,8 @@ lib: $(LIBOBJ)
$(RANLIB)
$(LIB)
||
echo
Never mind.
@
touch
lib
chacha-%.S
:
asm/chacha-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
files
:
$(PERL)
$(TOP)
/util/files.pl Makefile
>>
$(TOP)
/MINFO
...
...
crypto/chacha/asm/chacha-s390x.pl
0 → 100755
浏览文件 @
592eef5c
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# December 2015
#
# ChaCha20 for s390x.
#
# 3 times faster than compiler-generated code.
$flavour
=
shift
;
if
(
$flavour
=~
/3[12]/
)
{
$SIZE_T
=
4
;
$g
=
"";
}
else
{
$SIZE_T
=
8
;
$g
=
"
g
";
}
while
((
$output
=
shift
)
&&
(
$output
!~
/^\w[\w\-]*\.\w+$/
))
{}
open
STDOUT
,"
>
$output
";
sub
AUTOLOAD
()
#
thunk
[
simplified
]
x86
-
style
perlasm
{
my
$opcode
=
$AUTOLOAD
;
$opcode
=~
s/.*:://
;
$code
.=
"
\t
$opcode
\t
"
.
join
('
,
',
@
_
)
.
"
\n
";
}
my
$sp
=
"
%r15
";
my
$stdframe
=
16
*$SIZE_T
+
4
*
8
;
my
$frame
=
$stdframe
+
4
*
20
;
my
(
$out
,
$inp
,
$len
,
$key
,
$counter
)
=
map
("
%r
$_
",(
2
..
6
));
my
@x
=
map
("
%r
$_
",(
0
..
7
,"
x
","
x
","
x
","
x
",(
10
..
13
)));
my
@t
=
map
("
%r
$_
",(
8
,
9
));
sub
ROUND
{
my
(
$a0
,
$b0
,
$c0
,
$d0
)
=
@_
;
my
(
$a1
,
$b1
,
$c1
,
$d1
)
=
map
((
$_
&~
3
)
+
((
$_
+
1
)
&
3
),(
$a0
,
$b0
,
$c0
,
$d0
));
my
(
$a2
,
$b2
,
$c2
,
$d2
)
=
map
((
$_
&~
3
)
+
((
$_
+
1
)
&
3
),(
$a1
,
$b1
,
$c1
,
$d1
));
my
(
$a3
,
$b3
,
$c3
,
$d3
)
=
map
((
$_
&~
3
)
+
((
$_
+
1
)
&
3
),(
$a2
,
$b2
,
$c2
,
$d2
));
my
(
$xc
,
$xc_
)
=
map
("
\"
$_
\"
",
@t
);
my
@x
=
map
("
\"
$_
\"
",
@x
);
# Consider order in which variables are addressed by their
# index:
#
# a b c d
#
# 0 4 8 12 < even round
# 1 5 9 13
# 2 6 10 14
# 3 7 11 15
# 0 5 10 15 < odd round
# 1 6 11 12
# 2 7 8 13
# 3 4 9 14
#
# 'a', 'b' and 'd's are permanently allocated in registers,
# @x[0..7,12..15], while 'c's are maintained in memory. If
# you observe 'c' column, you'll notice that pair of 'c's is
# invariant between rounds. This means that we have to reload
# them once per round, in the middle. This is why you'll see
# 'c' stores and loads in the middle, but none in the beginning
# or end.
(
"
&alr (
@x
[
$a0
],
@x
[
$b0
])
",
# Q1
"
&alr (
@x
[
$a1
],
@x
[
$b1
])
",
# Q2
"
&xr (
@x
[
$d0
],
@x
[
$a0
])
",
"
&xr (
@x
[
$d1
],
@x
[
$a1
])
",
"
&rll (
@x
[
$d0
],
@x
[
$d0
],16)
",
"
&rll (
@x
[
$d1
],
@x
[
$d1
],16)
",
"
&alr (
$xc
,
@x
[
$d0
])
",
"
&alr (
$xc_
,
@x
[
$d1
])
",
"
&xr (
@x
[
$b0
],
$xc
)
",
"
&xr (
@x
[
$b1
],
$xc_
)
",
"
&rll (
@x
[
$b0
],
@x
[
$b0
],12)
",
"
&rll (
@x
[
$b1
],
@x
[
$b1
],12)
",
"
&alr (
@x
[
$a0
],
@x
[
$b0
])
",
"
&alr (
@x
[
$a1
],
@x
[
$b1
])
",
"
&xr (
@x
[
$d0
],
@x
[
$a0
])
",
"
&xr (
@x
[
$d1
],
@x
[
$a1
])
",
"
&rll (
@x
[
$d0
],
@x
[
$d0
],8)
",
"
&rll (
@x
[
$d1
],
@x
[
$d1
],8)
",
"
&alr (
$xc
,
@x
[
$d0
])
",
"
&alr (
$xc_
,
@x
[
$d1
])
",
"
&xr (
@x
[
$b0
],
$xc
)
",
"
&xr (
@x
[
$b1
],
$xc_
)
",
"
&rll (
@x
[
$b0
],
@x
[
$b0
],7)
",
"
&rll (
@x
[
$b1
],
@x
[
$b1
],7)
",
"
&stm (
$xc
,
$xc_
,'
$stdframe
+4*8+4*
$c0
(
$sp
)')
",
# reload pair of 'c's
"
&lm (
$xc
,
$xc_
,'
$stdframe
+4*8+4*
$c2
(
$sp
)')
",
"
&alr (
@x
[
$a2
],
@x
[
$b2
])
",
# Q3
"
&alr (
@x
[
$a3
],
@x
[
$b3
])
",
# Q4
"
&xr (
@x
[
$d2
],
@x
[
$a2
])
",
"
&xr (
@x
[
$d3
],
@x
[
$a3
])
",
"
&rll (
@x
[
$d2
],
@x
[
$d2
],16)
",
"
&rll (
@x
[
$d3
],
@x
[
$d3
],16)
",
"
&alr (
$xc
,
@x
[
$d2
])
",
"
&alr (
$xc_
,
@x
[
$d3
])
",
"
&xr (
@x
[
$b2
],
$xc
)
",
"
&xr (
@x
[
$b3
],
$xc_
)
",
"
&rll (
@x
[
$b2
],
@x
[
$b2
],12)
",
"
&rll (
@x
[
$b3
],
@x
[
$b3
],12)
",
"
&alr (
@x
[
$a2
],
@x
[
$b2
])
",
"
&alr (
@x
[
$a3
],
@x
[
$b3
])
",
"
&xr (
@x
[
$d2
],
@x
[
$a2
])
",
"
&xr (
@x
[
$d3
],
@x
[
$a3
])
",
"
&rll (
@x
[
$d2
],
@x
[
$d2
],8)
",
"
&rll (
@x
[
$d3
],
@x
[
$d3
],8)
",
"
&alr (
$xc
,
@x
[
$d2
])
",
"
&alr (
$xc_
,
@x
[
$d3
])
",
"
&xr (
@x
[
$b2
],
$xc
)
",
"
&xr (
@x
[
$b3
],
$xc_
)
",
"
&rll (
@x
[
$b2
],
@x
[
$b2
],7)
",
"
&rll (
@x
[
$b3
],
@x
[
$b3
],7)
"
);
}
$code
.=
<<___;
.text
.globl ChaCha20_ctr32
.type ChaCha20_ctr32,\@function
.align 32
ChaCha20_ctr32:
a${g}hi $len,-64
l${g}hi %r1,-$frame
stm${g} %r6,%r15,`6*$SIZE_T`($sp)
sl${g}r $out,$inp # difference
la $len,0($inp,$len) # end of input minus 64
larl %r7,.Lsigma
lgr %r0,$sp
la $sp,0(%r1,$sp)
st${g} %r0,0($sp)
lmg %r8,%r11,0($key) # load key
lmg %r12,%r13,0($counter) # load counter
lmg %r6,%r7,0(%r7) # load sigma constant
la %r14,0($inp)
st${g} $out,$frame+3*$SIZE_T($sp)
st${g} $len,$frame+4*$SIZE_T($sp)
stmg %r6,%r13,$stdframe($sp) # copy key schedule to stack
srlg @x[12],%r12,32 # 32-bit counter value
j .Loop_outer
.align 16
.Loop_outer:
lm @x[0],@x[7],$stdframe+4*0($sp) # load x[0]-x[7]
lm @t[0],@t[1],$stdframe+4*10($sp) # load x[10]-x[11]
lm @x[13],@x[15],$stdframe+4*13($sp) # load x[13]-x[15]
stm @t[0],@t[1],$stdframe+4*8+4*10($sp) # offload x[10]-x[11]
lm @t[0],@t[1],$stdframe+4*8($sp) # load x[8]-x[9]
st @x[12],$stdframe+4*12($sp) # save counter
st${g} %r14,$frame+2*$SIZE_T($sp) # save input pointer
lhi %r14,10
j .Loop
.align 4
.Loop:
___
foreach
(
&ROUND
(
0
,
4
,
8
,
12
))
{
eval
;
}
foreach
(
&ROUND
(
0
,
5
,
10
,
15
))
{
eval
;
}
$code
.=
<<___;
brct %r14,.Loop
l${g} %r14,$frame+2*$SIZE_T($sp) # pull input pointer
stm @t[0],@t[1],$stdframe+4*8+4*8($sp) # offload x[8]-x[9]
lm${g} @t[0],@t[1],$frame+3*$SIZE_T($sp)
al @x[0],$stdframe+4*0($sp) # accumulate key schedule
al @x[1],$stdframe+4*1($sp)
al @x[2],$stdframe+4*2($sp)
al @x[3],$stdframe+4*3($sp)
al @x[4],$stdframe+4*4($sp)
al @x[5],$stdframe+4*5($sp)
al @x[6],$stdframe+4*6($sp)
al @x[7],$stdframe+4*7($sp)
lrvr @x[0],@x[0]
lrvr @x[1],@x[1]
lrvr @x[2],@x[2]
lrvr @x[3],@x[3]
lrvr @x[4],@x[4]
lrvr @x[5],@x[5]
lrvr @x[6],@x[6]
lrvr @x[7],@x[7]
al @x[12],$stdframe+4*12($sp)
al @x[13],$stdframe+4*13($sp)
al @x[14],$stdframe+4*14($sp)
al @x[15],$stdframe+4*15($sp)
lrvr @x[12],@x[12]
lrvr @x[13],@x[13]
lrvr @x[14],@x[14]
lrvr @x[15],@x[15]
la @t[0],0(@t[0],%r14) # reconstruct output pointer
cl${g}r %r14,@t[1]
jh .Ltail
x @x[0],4*0(%r14) # xor with input
x @x[1],4*1(%r14)
st @x[0],4*0(@t[0]) # store output
x @x[2],4*2(%r14)
st @x[1],4*1(@t[0])
x @x[3],4*3(%r14)
st @x[2],4*2(@t[0])
x @x[4],4*4(%r14)
st @x[3],4*3(@t[0])
lm @x[0],@x[3],$stdframe+4*8+4*8($sp) # load x[8]-x[11]
x @x[5],4*5(%r14)
st @x[4],4*4(@t[0])
x @x[6],4*6(%r14)
al @x[0],$stdframe+4*8($sp)
st @x[5],4*5(@t[0])
x @x[7],4*7(%r14)
al @x[1],$stdframe+4*9($sp)
st @x[6],4*6(@t[0])
x @x[12],4*12(%r14)
al @x[2],$stdframe+4*10($sp)
st @x[7],4*7(@t[0])
x @x[13],4*13(%r14)
al @x[3],$stdframe+4*11($sp)
st @x[12],4*12(@t[0])
x @x[14],4*14(%r14)
st @x[13],4*13(@t[0])
x @x[15],4*15(%r14)
st @x[14],4*14(@t[0])
lrvr @x[0],@x[0]
st @x[15],4*15(@t[0])
lrvr @x[1],@x[1]
lrvr @x[2],@x[2]
lrvr @x[3],@x[3]
lhi @x[12],1
x @x[0],4*8(%r14)
al @x[12],$stdframe+4*12($sp) # increment counter
x @x[1],4*9(%r14)
st @x[0],4*8(@t[0])
x @x[2],4*10(%r14)
st @x[1],4*9(@t[0])
x @x[3],4*11(%r14)
st @x[2],4*10(@t[0])
la %r14,64(%r14)
st @x[3],4*11(@t[0])
cl${g}r %r14,@t[1] # done yet?
jle .Loop_outer
.Ldone:
xgr %r0,%r0
xgr %r1,%r1
xgr %r2,%r2
xgr %r3,%r3
stmg %r0,%r3,$stdframe+4*4($sp) # wipe key copy
stmg %r0,%r3,$stdframe+4*12($sp)
lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
br %r14
.align 16
.Ltail:
la @t[1],64($t[1])
stm @x[0],@x[7],$stdframe+4*0($sp)
sl${g}r @t[1],%r14
lm @x[0],@x[3],$stdframe+4*8+4*8($sp)
l${g}hi @x[6],0
stm @x[12],@x[15],$stdframe+4*12($sp)
al @x[0],$stdframe+4*8($sp)
al @x[1],$stdframe+4*9($sp)
al @x[2],$stdframe+4*10($sp)
al @x[3],$stdframe+4*11($sp)
lrvr @x[0],@x[0]
lrvr @x[1],@x[1]
lrvr @x[2],@x[2]
lrvr @x[3],@x[3]
stm @x[0],@x[3],$stdframe+4*8+4*8($sp)
.Loop_tail:
llgc @x[4],0(@x[6],%r14)
llgc @x[5],$stdframe(@x[6],$sp)
xr @x[5],@x[4]
stc @x[5],0(@x[6],@t[0])
la @x[6],1(@x[6])
brct @t[1],.Loop_tail
j .Ldone
.size ChaCha20_ctr32,.-ChaCha20_ctr32
.align 32
.Lsigma:
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
.asciz "ChaCha20 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
foreach
(
split
("
\n
",
$code
))
{
s/\`([^\`]*)\`/eval $1/g
e
;
print
$_
,"
\n
";
}
close
STDOUT
;
crypto/poly1305/Makefile.in
浏览文件 @
592eef5c
...
...
@@ -39,6 +39,8 @@ lib: $(LIBOBJ)
poly1305-sparcv9.S
:
asm/poly1305-sparcv9.pl
$(PERL)
asm/poly1305-sparcv9.pl
>
$@
poly1305-%.S
:
asm/poly1305-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
files
:
$(PERL)
$(TOP)
/util/files.pl Makefile
>>
$(TOP)
/MINFO
...
...
crypto/poly1305/asm/poly1305-s390x.pl
0 → 100755
浏览文件 @
592eef5c
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements Poly1305 hash for s390x.
#
# June 2015
#
# ~6.4/2.2 cpb on z10/z196+, >2x improvement over compiler-generated
# code. For older compiler improvement coefficient is >3x, because
# then base 2^64 and base 2^32 implementations are compared.
#
# On side note, z13 enables vector base 2^26 implementation...
$flavour
=
shift
;
if
(
$flavour
=~
/3[12]/
)
{
$SIZE_T
=
4
;
$g
=
"";
}
else
{
$SIZE_T
=
8
;
$g
=
"
g
";
}
while
((
$output
=
shift
)
&&
(
$output
!~
/^\w[\w\-]*\.\w+$/
))
{}
open
STDOUT
,"
>
$output
";
$sp
=
"
%r15
";
my
(
$ctx
,
$inp
,
$len
,
$padbit
)
=
map
("
%r
$_
",(
2
..
5
));
$code
.=
<<___;
.text
.globl poly1305_init
.type poly1305_init,\@function
.align 16
poly1305_init:
lghi %r0,0
lghi %r1,-1
stg %r0,0($ctx) # zero hash value
stg %r0,8($ctx)
stg %r0,16($ctx)
cl${g}r $inp,%r0
je .Lno_key
lrvg %r4,0($inp) # load little-endian key
lrvg %r5,8($inp)
nihl %r1,0xffc0 # 0xffffffc0ffffffff
srlg %r0,%r1,4 # 0x0ffffffc0fffffff
srlg %r1,%r1,4
nill %r1,0xfffc # 0x0ffffffc0ffffffc
ngr %r4,%r0
ngr %r5,%r1
stg %r4,32($ctx)
stg %r5,40($ctx)
.Lno_key:
lghi %r2,0
br %r14
.size poly1305_init,.-poly1305_init
___
{
my
(
$d0hi
,
$d0lo
,
$d1hi
,
$d1lo
,
$t0
,
$h0
,
$t1
,
$h1
,
$h2
)
=
map
("
%r
$_
",(
6
..
14
));
my
(
$r0
,
$r1
,
$s1
)
=
map
("
%r
$_
",(
0
..
2
));
$code
.=
<<___;
.globl poly1305_blocks
.type poly1305_blocks,\@function
.align 16
poly1305_blocks:
srl${g} $len,$len,4
lghi %r0,0
cl${g}r $len,%r0
je .Lno_data
stm${g} %r6,%r14,`6*$SIZE_T`($sp)
lg $r0,32($ctx) # load key
lg $r1,40($ctx)
lg $h0,0($ctx) # load hash value
lg $h1,8($ctx)
lg $h2,16($ctx)
st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx
srlg $s1,$r1,2
algr $s1,$r1 # s1 = r1 + r1>>2
j .Loop
.align 16
.Loop:
lrvg $d0lo,0($inp) # load little-endian input
lrvg $d1lo,8($inp)
la $inp,16($inp)
algr $d0lo,$h0 # accumulate input
alcgr $d1lo,$h1
lgr $h0,$d0lo
mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo
lgr $h1,$d1lo
mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo
mlgr $t0,$r1 # h0*r1 -> $t0:$h0
mlgr $t1,$r0 # h1*r0 -> $t1:$h1
alcgr $h2,$padbit
algr $d0lo,$d1lo
lgr $d1lo,$h2
alcgr $d0hi,$d1hi
lghi $d1hi,0
algr $h1,$h0
alcgr $t1,$t0
msgr $d1lo,$s1 # h2*s1
msgr $h2,$r0 # h2*r0
algr $h1,$d1lo
alcgr $t1,$d1hi # $d1hi is zero
algr $h1,$d0hi
alcgr $h2,$t1
lghi $h0,-4 # final reduction step
ngr $h0,$h2
srlg $t0,$h2,2
algr $h0,$t0
algr $h0,$d0lo
lghi $t1,3
alcgr $h1,$d1hi # $d1hi is still zero
ngr $h2,$t1
brct$g $len,.Loop
l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx
stg $h0,0($ctx) # store hash value
stg $h1,8($ctx)
stg $h2,16($ctx)
lm${g} %r6,%r14,`6*$SIZE_T`($sp)
.Lno_data:
br %r14
.size poly1305_blocks,.-poly1305_blocks
___
}
{
my
(
$mac
,
$nonce
)
=
(
$inp
,
$len
);
my
(
$h0
,
$h1
,
$h2
,
$d0
,
$d1
)
=
map
("
%r
$_
",(
5
..
9
));
$code
.=
<<___;
.globl poly1305_emit
.type poly1305_emit,\@function
.align 16
poly1305_emit:
stm${g} %r6,%r9,`6*$SIZE_T`($sp)
lg $h0,0($ctx)
lg $h1,8($ctx)
lg $h2,16($ctx)
lghi %r0,5
lghi %r1,0
lgr $d0,$h0
lgr $d1,$h1
algr $h0,%r0 # compare to modulus
alcgr $h1,%r1
alcgr $h2,%r1
srlg $h2,$h2,2 # did it borrow/carry?
slgr %r1,$h2 # 0-$h2>>2
lg $h2,0($nonce) # load nonce
lghi %r0,-1
lg $ctx,8($nonce)
xgr %r0,%r1 # ~%r1
ngr $h0,%r1
ngr $d0,%r0
ngr $h1,%r1
ngr $d1,%r0
ogr $h0,$d0
rllg $d0,$h2,32 # flip nonce words
ogr $h1,$d1
rllg $d1,$ctx,32
algr $h0,$d0 # accumulate nonce
alcgr $h1,$d1
strvg $h0,0($mac) # write little-endian result
strvg $h1,8($mac)
lm${g} %r6,%r9,`6*$SIZE_T`($sp)
br %r14
.size poly1305_emit,.-poly1305_emit
.string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
}
$code
=~
s/\`([^\`]*)\`/eval $1/g
em
;
print
$code
;
close
STDOUT
;
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录