Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
cd686946
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
大约 1 年 前同步成功
通知
9
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
cd686946
编写于
11月 24, 2012
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
AES for SPARC T4: add XTS, reorder subroutines to improve TLB locality.
上级
1c16fd1f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
624 addition
and
132 deletion
+624
-132
crypto/aes/asm/aest4-sparcv9.pl
crypto/aes/asm/aest4-sparcv9.pl
+116
-99
crypto/evp/e_aes.c
crypto/evp/e_aes.c
+17
-4
crypto/perlasm/sparcv9_modes.pl
crypto/perlasm/sparcv9_modes.pl
+491
-29
未找到文件。
crypto/aes/asm/aest4-sparcv9.pl
浏览文件 @
cd686946
...
@@ -83,7 +83,12 @@ $::evp=1; # if $evp is set to 0, script generates module with
...
@@ -83,7 +83,12 @@ $::evp=1; # if $evp is set to 0, script generates module with
{
{
my
(
$inp
,
$out
,
$key
,
$rounds
,
$tmp
,
$mask
)
=
map
("
%o
$_
",(
0
..
5
));
my
(
$inp
,
$out
,
$key
,
$rounds
,
$tmp
,
$mask
)
=
map
("
%o
$_
",(
0
..
5
));
$code
=
<<___;
$code
.=<<
___
if
(
$
::
abibits
==
64
);
.
register
%g2
,
#scratch
.
register
%g3
,
#scratch
___
$code
.=
<<___;
.text
.text
.globl aes_t4_encrypt
.globl aes_t4_encrypt
...
@@ -411,24 +416,6 @@ my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
...
@@ -411,24 +416,6 @@ my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
my
(
$ileft
,
$iright
,
$ooff
,
$omask
,
$ivoff
)
=
map
("
%l
$_
",(
1
..
7
));
my
(
$ileft
,
$iright
,
$ooff
,
$omask
,
$ivoff
)
=
map
("
%l
$_
",(
1
..
7
));
$code
.=
<<___;
$code
.=
<<___;
.align 32
_aes128_loadkey:
ldx [$key + 0], %g4
ldx [$key + 8], %g5
___
for
(
$i
=
2
;
$i
<
22
;
$i
++
)
{
# load key schedule
$code
.=
<<___;
ldd [$key + `8*$i`], %f`12+2*$i`
___
}
$code
.=
<<___;
retl
nop
.type _aes128_loadkey,#function
.size _aes128_loadkey,.-_aes128_loadkey
_aes128_load_enckey=_aes128_loadkey
_aes128_load_deckey=_aes128_loadkey
.align 32
.align 32
_aes128_encrypt_1x:
_aes128_encrypt_1x:
___
___
...
@@ -477,6 +464,35 @@ $code.=<<___;
...
@@ -477,6 +464,35 @@ $code.=<<___;
.type _aes128_encrypt_2x,#function
.type _aes128_encrypt_2x,#function
.size _aes128_encrypt_2x,.-_aes128_encrypt_2x
.size _aes128_encrypt_2x,.-_aes128_encrypt_2x
.align 32
_aes128_loadkey:
ldx [$key + 0], %g4
ldx [$key + 8], %g5
___
for
(
$i
=
2
;
$i
<
22
;
$i
++
)
{
# load key schedule
$code
.=
<<___;
ldd [$key + `8*$i`], %f`12+2*$i`
___
}
$code
.=
<<___;
retl
nop
.type _aes128_loadkey,#function
.size _aes128_loadkey,.-_aes128_loadkey
_aes128_load_enckey=_aes128_loadkey
_aes128_load_deckey=_aes128_loadkey
___
&alg_cbc_encrypt_implement
("
aes
",
128
);
if
(
$
::
evp
)
{
&alg_ctr32_implement
("
aes
",
128
);
&alg_xts_implement
("
aes
",
128
,"
en
");
&alg_xts_implement
("
aes
",
128
,"
de
");
}
&alg_cbc_decrypt_implement
("
aes
",
128
);
$code
.=
<<___;
.align 32
.align 32
_aes128_decrypt_1x:
_aes128_decrypt_1x:
___
___
...
@@ -524,28 +540,9 @@ $code.=<<___;
...
@@ -524,28 +540,9 @@ $code.=<<___;
aes_dround23_l %f54, %f10, %f6, %f6
aes_dround23_l %f54, %f10, %f6, %f6
.type _aes128_decrypt_2x,#function
.type _aes128_decrypt_2x,#function
.size _aes128_decrypt_2x,.-_aes128_decrypt_2x
.size _aes128_decrypt_2x,.-_aes128_decrypt_2x
.align 32
_aes192_loadkey:
_aes256_loadkey:
ldx [$key + 0], %g4
ldx [$key + 8], %g5
___
___
for
(
$i
=
2
;
$i
<
26
;
$i
++
)
{
# load key schedule
$code
.=
<<___;
ldd [$key + `8*$i`], %f`12+2*$i`
___
}
$code
.=
<<___;
retl
nop
.type _aes192_loadkey,#function
.size _aes192_loadkey,.-_aes192_loadkey
_aes192_load_enckey=_aes192_loadkey
_aes192_load_deckey=_aes192_loadkey
_aes256_load_enckey=_aes192_loadkey
_aes256_load_deckey=_aes192_loadkey
$code
.=
<<___;
.align 32
.align 32
_aes192_encrypt_1x:
_aes192_encrypt_1x:
___
___
...
@@ -594,54 +591,6 @@ $code.=<<___;
...
@@ -594,54 +591,6 @@ $code.=<<___;
.type _aes192_encrypt_2x,#function
.type _aes192_encrypt_2x,#function
.size _aes192_encrypt_2x,.-_aes192_encrypt_2x
.size _aes192_encrypt_2x,.-_aes192_encrypt_2x
.align 32
_aes192_decrypt_1x:
___
for
(
$i
=
0
;
$i
<
5
;
$i
++
)
{
$code
.=
<<___;
aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
___
}
$code
.=
<<___;
aes_dround01 %f56, %f0, %f2, %f4
aes_dround23 %f58, %f0, %f2, %f2
aes_dround01_l %f60, %f4, %f2, %f0
retl
aes_dround23_l %f62, %f4, %f2, %f2
.type _aes192_decrypt_1x,#function
.size _aes192_decrypt_1x,.-_aes192_decrypt_1x
.align 32
_aes192_decrypt_2x:
___
for
(
$i
=
0
;
$i
<
5
;
$i
++
)
{
$code
.=
<<___;
aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
___
}
$code
.=
<<___;
aes_dround01 %f56, %f0, %f2, %f8
aes_dround23 %f58, %f0, %f2, %f2
aes_dround01 %f56, %f4, %f6, %f10
aes_dround23 %f58, %f4, %f6, %f6
aes_dround01_l %f60, %f8, %f2, %f0
aes_dround23_l %f62, %f8, %f2, %f2
aes_dround01_l %f60, %f10, %f6, %f4
retl
aes_dround23_l %f62, %f10, %f6, %f6
.type _aes192_decrypt_2x,#function
.size _aes192_decrypt_2x,.-_aes192_decrypt_2x
.align 32
.align 32
_aes256_encrypt_1x:
_aes256_encrypt_1x:
aes_eround01 %f16, %f0, %f2, %f4
aes_eround01 %f16, %f0, %f2, %f4
...
@@ -718,6 +667,40 @@ $code.=<<___;
...
@@ -718,6 +667,40 @@ $code.=<<___;
.type _aes256_encrypt_2x,#function
.type _aes256_encrypt_2x,#function
.size _aes256_encrypt_2x,.-_aes256_encrypt_2x
.size _aes256_encrypt_2x,.-_aes256_encrypt_2x
.align 32
_aes192_loadkey:
ldx [$key + 0], %g4
ldx [$key + 8], %g5
___
for
(
$i
=
2
;
$i
<
26
;
$i
++
)
{
# load key schedule
$code
.=
<<___;
ldd [$key + `8*$i`], %f`12+2*$i`
___
}
$code
.=
<<___;
retl
nop
.type _aes192_loadkey,#function
.size _aes192_loadkey,.-_aes192_loadkey
_aes256_loadkey=_aes192_loadkey
_aes192_load_enckey=_aes192_loadkey
_aes192_load_deckey=_aes192_loadkey
_aes256_load_enckey=_aes192_loadkey
_aes256_load_deckey=_aes192_loadkey
___
&alg_cbc_encrypt_implement
("
aes
",
256
);
&alg_cbc_encrypt_implement
("
aes
",
192
);
if
(
$
::
evp
)
{
&alg_ctr32_implement
("
aes
",
256
);
&alg_xts_implement
("
aes
",
256
,"
en
");
&alg_xts_implement
("
aes
",
256
,"
de
");
&alg_ctr32_implement
("
aes
",
192
);
}
&alg_cbc_decrypt_implement
("
aes
",
192
);
&alg_cbc_decrypt_implement
("
aes
",
256
);
$code
.=
<<___;
.align 32
.align 32
_aes256_decrypt_1x:
_aes256_decrypt_1x:
aes_dround01 %f16, %f0, %f2, %f4
aes_dround01 %f16, %f0, %f2, %f4
...
@@ -793,21 +776,55 @@ $code.=<<___;
...
@@ -793,21 +776,55 @@ $code.=<<___;
ldd [$key + 40], %f22
ldd [$key + 40], %f22
.type _aes256_decrypt_2x,#function
.type _aes256_decrypt_2x,#function
.size _aes256_decrypt_2x,.-_aes256_decrypt_2x
.size _aes256_decrypt_2x,.-_aes256_decrypt_2x
___
&alg_cbc_encrypt_implement
("
aes
",
128
);
.align 32
&alg_cbc_encrypt_implement
("
aes
",
192
);
_aes192_decrypt_1x:
&alg_cbc_encrypt_implement
("
aes
",
256
);
___
for
(
$i
=
0
;
$i
<
5
;
$i
++
)
{
&alg_cbc_decrypt_implement
("
aes
",
128
);
$code
.=
<<___;
&alg_cbc_decrypt_implement
("
aes
",
192
);
aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
&alg_cbc_decrypt_implement
("
aes
",
256
);
aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
___
}
$code
.=
<<___;
aes_dround01 %f56, %f0, %f2, %f4
aes_dround23 %f58, %f0, %f2, %f2
aes_dround01_l %f60, %f4, %f2, %f0
retl
aes_dround23_l %f62, %f4, %f2, %f2
.type _aes192_decrypt_1x,#function
.size _aes192_decrypt_1x,.-_aes192_decrypt_1x
if
(
$
::
evp
)
{
.align 32
&alg_ctr32_implement
("
aes
",
128
);
_aes192_decrypt_2x:
&alg_ctr32_implement
("
aes
",
192
);
___
&alg_ctr32_implement
("
aes
",
256
);
for
(
$i
=
0
;
$i
<
5
;
$i
++
)
{
$code
.=
<<___;
aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
___
}
}
$code
.=
<<___;
aes_dround01 %f56, %f0, %f2, %f8
aes_dround23 %f58, %f0, %f2, %f2
aes_dround01 %f56, %f4, %f6, %f10
aes_dround23 %f58, %f4, %f6, %f6
aes_dround01_l %f60, %f8, %f2, %f0
aes_dround23_l %f62, %f8, %f2, %f2
aes_dround01_l %f60, %f10, %f6, %f4
retl
aes_dround23_l %f62, %f10, %f6, %f6
.type _aes192_decrypt_2x,#function
.size _aes192_decrypt_2x,.-_aes192_decrypt_2x
___
}}}
}}}
if
(
!
$
::
evp
)
{
if
(
!
$
::
evp
)
{
...
...
crypto/evp/e_aes.c
浏览文件 @
cd686946
...
@@ -511,6 +511,18 @@ void aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
...
@@ -511,6 +511,18 @@ void aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
void
aes256_t4_ctr32_encrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
void
aes256_t4_ctr32_encrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
size_t
blocks
,
const
AES_KEY
*
key
,
size_t
blocks
,
const
AES_KEY
*
key
,
unsigned
char
*
ivec
);
unsigned
char
*
ivec
);
void
aes128_t4_xts_encrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
size_t
blocks
,
const
AES_KEY
*
key1
,
const
AES_KEY
*
key2
,
const
unsigned
char
*
ivec
);
void
aes128_t4_xts_decrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
size_t
blocks
,
const
AES_KEY
*
key1
,
const
AES_KEY
*
key2
,
const
unsigned
char
*
ivec
);
void
aes256_t4_xts_encrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
size_t
blocks
,
const
AES_KEY
*
key1
,
const
AES_KEY
*
key2
,
const
unsigned
char
*
ivec
);
void
aes256_t4_xts_decrypt
(
const
unsigned
char
*
in
,
unsigned
char
*
out
,
size_t
blocks
,
const
AES_KEY
*
key1
,
const
AES_KEY
*
key2
,
const
unsigned
char
*
ivec
);
static
int
aes_t4_init_key
(
EVP_CIPHER_CTX
*
ctx
,
const
unsigned
char
*
key
,
static
int
aes_t4_init_key
(
EVP_CIPHER_CTX
*
ctx
,
const
unsigned
char
*
key
,
const
unsigned
char
*
iv
,
int
enc
)
const
unsigned
char
*
iv
,
int
enc
)
...
@@ -681,46 +693,47 @@ static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
...
@@ -681,46 +693,47 @@ static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
if
(
key
)
if
(
key
)
{
{
int
bits
=
ctx
->
key_len
*
4
;
int
bits
=
ctx
->
key_len
*
4
;
xctx
->
stream
=
NULL
;
/* key_len is two AES keys */
/* key_len is two AES keys */
if
(
enc
)
if
(
enc
)
{
{
aes_t4_set_encrypt_key
(
key
,
bits
,
&
xctx
->
ks1
.
ks
);
aes_t4_set_encrypt_key
(
key
,
bits
,
&
xctx
->
ks1
.
ks
);
xctx
->
xts
.
block1
=
(
block128_f
)
aes_t4_encrypt
;
xctx
->
xts
.
block1
=
(
block128_f
)
aes_t4_encrypt
;
#if 0 /* not yet */
switch
(
bits
)
{
switch
(
bits
)
{
case
128
:
case
128
:
xctx
->
stream
=
aes128_t4_xts_encrypt
;
xctx
->
stream
=
aes128_t4_xts_encrypt
;
break
;
break
;
#if 0 /* not yet */
case 192:
case 192:
xctx->stream = aes192_t4_xts_encrypt;
xctx->stream = aes192_t4_xts_encrypt;
break;
break;
#endif
case
256
:
case
256
:
xctx
->
stream
=
aes256_t4_xts_encrypt
;
xctx
->
stream
=
aes256_t4_xts_encrypt
;
break
;
break
;
default:
default:
return
0
;
return
0
;
}
}
#endif
}
}
else
else
{
{
aes_t4_set_decrypt_key
(
key
,
ctx
->
key_len
*
4
,
&
xctx
->
ks1
.
ks
);
aes_t4_set_decrypt_key
(
key
,
ctx
->
key_len
*
4
,
&
xctx
->
ks1
.
ks
);
xctx
->
xts
.
block1
=
(
block128_f
)
aes_t4_decrypt
;
xctx
->
xts
.
block1
=
(
block128_f
)
aes_t4_decrypt
;
#if 0 /* not yet */
switch
(
bits
)
{
switch
(
bits
)
{
case
128
:
case
128
:
xctx
->
stream
=
aes128_t4_xts_decrypt
;
xctx
->
stream
=
aes128_t4_xts_decrypt
;
break
;
break
;
#if 0 /* not yet */
case 192:
case 192:
xctx->stream = aes192_t4_xts_decrypt;
xctx->stream = aes192_t4_xts_decrypt;
break;
break;
#endif
case
256
:
case
256
:
xctx
->
stream
=
aes256_t4_xts_decrypt
;
xctx
->
stream
=
aes256_t4_xts_decrypt
;
break
;
break
;
default:
default:
return
0
;
return
0
;
}
}
#endif
}
}
aes_t4_set_encrypt_key
(
key
+
ctx
->
key_len
/
2
,
aes_t4_set_encrypt_key
(
key
+
ctx
->
key_len
/
2
,
...
...
crypto/perlasm/sparcv9_modes.pl
浏览文件 @
cd686946
...
@@ -900,6 +900,480 @@ $::code.=<<___;
...
@@ -900,6 +900,480 @@ $::code.=<<___;
___
___
}
}
sub
alg_xts_implement
{
my
(
$alg
,
$bits
,
$dir
)
=
@_
;
my
(
$inp
,
$out
,
$len
,
$key1
,
$key2
,
$ivec
)
=
map
("
%i
$_
",(
0
..
5
));
my
$rem
=
$ivec
;
$
::
code
.=
<<___;
.globl ${alg}${bits}_t4_xts_${dir}crypt
.align 32
${alg}${bits}_t4_xts_${dir}crypt:
save %sp, -$::frame-16, %sp
mov $ivec, %o0
add %fp, $::bias-16, %o1
call ${alg}_t4_encrypt
mov $key2, %o2
add %fp, $::bias-16, %l7
ldxa [%l7]0x88, %g2
add %fp, $::bias-8, %l7
ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
sethi %hi(0x76543210), %l7
or %l7, %lo(0x76543210), %l7
bmask %l7, %g0, %g0 ! byte swap mask
prefetch [$inp], 20
prefetch [$inp + 63], 20
call _${alg}${bits}_load_${dir}ckey
and $len, 15, $rem
and $len, -16, $len
___
$code
.=<<
___
if
(
$dir
eq
"
de
");
mov
0
,
%l7
movrnz
$rem
,
16
,
%l7
sub
$
len
,
%
l7
,
$
len
___
$
code
.=<<
___
;
sub
$
inp
,
$
out
,
$
blk_init
!
$
inp!
=$
out
and
$
inp
,
7,
$
ileft
andn
$
inp
,
7,
$
inp
sll
$
ileft
,
3,
$
ileft
mov
64,
$
iright
mov
0
xff
,
$
omask
sub
$
iright
,
$
ileft
,
$
iright
and
$
out
,
7,
$
ooff
cmp
$
len
,
255
movrnz
$
ooff
,
0,
$
blk_init
!
if
(
$
out
&7
||
movleu
$::
size_t_cc
,
0,
$
blk_init
!
$
len
<256
||
brnz
,pn $blk_init, .L${
bits
}
_xts_$
{
dir
}
blk
!
$inp
==
$out
)
srl
$omask
,
$ooff
,
$omask
andcc
$len
,
16
,
%g0
!
is
number
of
blocks
even
?
___
$code
.=<<
___
if
(
$dir
eq
"
de
");
brz
,
pn
$len
,
.
L$
{
bits
}
_xts_$
{
dir
}
steal
___
$code
.=
<<___;
alignaddrl $out, %g0, $out
bz %icc, .L${bits}_xts_${dir}loop2x
srlx $len, 4, $len
.L${bits}_xts_${dir}loop:
ldx [$inp + 0], %o0
brz,pt $ileft, 4f
ldx [$inp + 8], %o1
ldx [$inp + 16], %o2
sllx %o0, $ileft, %o0
srlx %o1, $iright, %g1
sllx %o1, $ileft, %o1
or %g1, %o0, %o0
srlx %o2, $iright, %o2
or %o2, %o1, %o1
4:
movxtod %g2, %f12
movxtod %g3, %f14
bshuffle %f12, %f12, %f12
bshuffle %f14, %f14, %f14
xor %g4, %o0, %o0 ! ^= rk[0]
xor %g5, %o1, %o1
movxtod %o0, %f0
movxtod %o1, %f2
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
prefetch [$out + 63], 22
prefetch [$inp + 16+63], 20
call _${alg}${bits}_${dir}crypt_1x
add $inp, 16, $inp
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
srax %g3, 63, %l7 ! next tweak value
addcc %g2, %g2, %g2
and %l7, 0x87, %l7
addxc %g3, %g3, %g3
xor %l7, %g2, %g2
brnz,pn $ooff, 2f
sub $len, 1, $len
std %f0, [$out + 0]
std %f2, [$out + 8]
brnz,pt $len, .L${bits}_xts_${dir}loop2x
add $out, 16, $out
brnz,pn $rem, .L${bits}_xts_${dir}steal
nop
ret
restore
.align 16
2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
! and ~3x deterioration
! in inp==out case
faligndata %f0, %f0, %f4 ! handle unaligned output
faligndata %f0, %f2, %f6
faligndata %f2, %f2, %f8
stda %f4, [$out + $omask]0xc0 ! partial store
std %f6, [$out + 8]
add $out, 16, $out
orn %g0, $omask, $omask
stda %f8, [$out + $omask]0xc0 ! partial store
brnz,pt $len, .L${bits}_xts_${dir}loop2x+4
orn %g0, $omask, $omask
brnz,pn $rem, .L${bits}_xts_${dir}steal
nop
ret
restore
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
.align 32
.L${bits}_xts_${dir}loop2x:
ldx [$inp + 0], %o0
ldx [$inp + 8], %o1
ldx [$inp + 16], %o2
brz,pt $ileft, 4f
ldx [$inp + 24], %o3
ldx [$inp + 32], %o4
sllx %o0, $ileft, %o0
srlx %o1, $iright, %g1
or %g1, %o0, %o0
sllx %o1, $ileft, %o1
srlx %o2, $iright, %g1
or %g1, %o1, %o1
sllx %o2, $ileft, %o2
srlx %o3, $iright, %g1
or %g1, %o2, %o2
sllx %o3, $ileft, %o3
srlx %o4, $iright, %o4
or %o4, %o3, %o3
4:
movxtod %g2, %f12
movxtod %g3, %f14
bshuffle %f12, %f12, %f12
bshuffle %f14, %f14, %f14
srax %g3, 63, %l7 ! next tweak value
addcc %g2, %g2, %g2
and %l7, 0x87, %l7
addxc %g3, %g3, %g3
xor %l7, %g2, %g2
movxtod %g2, %f8
movxtod %g3, %f10
bshuffle %f8, %f8, %f8
bshuffle %f10, %f10, %f10
xor %g4, %o0, %o0 ! ^= rk[0]
xor %g5, %o1, %o1
xor %g4, %o2, %o2 ! ^= rk[0]
xor %g5, %o3, %o3
movxtod %o0, %f0
movxtod %o1, %f2
movxtod %o2, %f4
movxtod %o3, %f6
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
fxor %f8, %f4, %f4 ! ^= tweak[0]
fxor %f10, %f6, %f6
prefetch [$out + 63], 22
prefetch [$inp + 32+63], 20
call _${alg}${bits}_${dir}crypt_2x
add $inp, 32, $inp
movxtod %g2, %f8
movxtod %g3, %f10
srax %g3, 63, %l7 ! next tweak value
addcc %g2, %g2, %g2
and %l7, 0x87, %l7
addxc %g3, %g3, %g3
xor %l7, %g2, %g2
bshuffle %f8, %f8, %f8
bshuffle %f10, %f10, %f10
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
fxor %f8, %f4, %f4
fxor %f10, %f6, %f6
brnz,pn $ooff, 2f
sub $len, 2, $len
std %f0, [$out + 0]
std %f2, [$out + 8]
std %f4, [$out + 16]
std %f6, [$out + 24]
brnz,pt $len, .L${bits}_xts_${dir}loop2x
add $out, 32, $out
fsrc2 %f4, %f0
fsrc2 %f6, %f2
brnz,pn $rem, .L${bits}_xts_${dir}steal
nop
ret
restore
.align 16
2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard
! and ~3x deterioration
! in inp==out case
faligndata %f0, %f0, %f8 ! handle unaligned output
faligndata %f0, %f2, %f10
faligndata %f2, %f4, %f12
faligndata %f4, %f6, %f14
faligndata %f6, %f6, %f0
stda %f8, [$out + $omask]0xc0 ! partial store
std %f10, [$out + 8]
std %f12, [$out + 16]
std %f14, [$out + 24]
add $out, 32, $out
orn %g0, $omask, $omask
stda %f0, [$out + $omask]0xc0 ! partial store
brnz,pt $len, .L${bits}_xts_${dir}loop2x+4
orn %g0, $omask, $omask
fsrc2 %f4, %f0
fsrc2 %f6, %f2
brnz,pn $rem, .L${bits}_xts_${dir}steal
nop
ret
restore
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
.align 32
.L${bits}_xts_${dir}blk:
add $out, $len, $blk_init
and $blk_init, 63, $blk_init ! tail
sub $len, $blk_init, $len
add $blk_init, 15, $blk_init ! round up to 16n
srlx $len, 4, $len
srl $blk_init, 4, $blk_init
sub $len, 1, $len
add $blk_init, 1, $blk_init
.L${bits}_xts_${dir}blk2x:
ldx [$inp + 0], %o0
ldx [$inp + 8], %o1
ldx [$inp + 16], %o2
brz,pt $ileft, 5f
ldx [$inp + 24], %o3
ldx [$inp + 32], %o4
sllx %o0, $ileft, %o0
srlx %o1, $iright, %g1
or %g1, %o0, %o0
sllx %o1, $ileft, %o1
srlx %o2, $iright, %g1
or %g1, %o1, %o1
sllx %o2, $ileft, %o2
srlx %o3, $iright, %g1
or %g1, %o2, %o2
sllx %o3, $ileft, %o3
srlx %o4, $iright, %o4
or %o4, %o3, %o3
5:
movxtod %g2, %f12
movxtod %g3, %f14
bshuffle %f12, %f12, %f12
bshuffle %f14, %f14, %f14
srax %g3, 63, %l7 ! next tweak value
addcc %g2, %g2, %g2
and %l7, 0x87, %l7
addxc %g3, %g3, %g3
xor %l7, %g2, %g2
movxtod %g2, %f8
movxtod %g3, %f10
bshuffle %f8, %f8, %f8
bshuffle %f10, %f10, %f10
xor %g4, %o0, %o0 ! ^= rk[0]
xor %g5, %o1, %o1
xor %g4, %o2, %o2 ! ^= rk[0]
xor %g5, %o3, %o3
movxtod %o0, %f0
movxtod %o1, %f2
movxtod %o2, %f4
movxtod %o3, %f6
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
fxor %f8, %f4, %f4 ! ^= tweak[0]
fxor %f10, %f6, %f6
prefetch [$inp + 32+63], 20
call _${alg}${bits}_${dir}crypt_2x
add $inp, 32, $inp
movxtod %g2, %f8
movxtod %g3, %f10
srax %g3, 63, %l7 ! next tweak value
addcc %g2, %g2, %g2
and %l7, 0x87, %l7
addxc %g3, %g3, %g3
xor %l7, %g2, %g2
bshuffle %f8, %f8, %f8
bshuffle %f10, %f10, %f10
fxor %f12, %f0, %f0 ! ^= tweak[0]
fxor %f14, %f2, %f2
fxor %f8, %f4, %f4
fxor %f10, %f6, %f6
stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
add $out, 8, $out
stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
add $out, 8, $out
stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
add $out, 8, $out
stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific
bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x
add $out, 8, $out
add $blk_init, $len, $len
andcc $len, 1, %g0 ! is number of blocks even?
membar #StoreLoad|#StoreStore
bnz,pt %icc, .L${bits}_xts_${dir}loop
srl $len, 0, $len
brnz,pn $len, .L${bits}_xts_${dir}loop2x
nop
fsrc2 %f4, %f0
fsrc2 %f6, %f2
brnz,pn $rem, .L${bits}_xts_${dir}steal
nop
ret
restore
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
___
$code
.=<<
___
if
(
$dir
eq
"
en
");
.
align
32
.
L$
{
bits
}
_xts_$
{
dir
}
steal:
std
%f0
,
[
%fp
+
$
::
bias
-
16
]
!
copy
of
output
std
%f2
,
[
%fp
+
$
::
bias
-
8
]
srl
$ileft
,
3
,
$ileft
add
%fp
,
$
::
bias
-
16
,
%l7
add
$inp
,
$ileft
,
$inp
!
original
$inp
+
$len
&-
15
add
$out
,
$ooff
,
$out
!
original
$out
+
$len
&-
15
mov
0
,
$ileft
nop
!
align
.
L$
{
bits
}
_xts_$
{
dir
}
stealing:
ldub
[
$inp
+
$ileft
],
%o0
ldub
[
%l7
+
$ileft
],
%o1
dec
$rem
stb
%o0
,
[
%l7
+
$ileft
]
stb
%o1
,
[
$out
+
$ileft
]
brnz
$rem
,
.
L$
{
bits
}
_xts_$
{
dir
}
stealing
inc
$ileft
mov
%l7
,
$inp
sub
$
out
,
16,
$
out
mov
0,
$
ileft
sub
$
out
,
$
ooff
,
$
out
ba
.L${
bits
}
_xts_$
{
dir
}
loop
!
one
more
time
mov
1
,
$len
!
$rem
is
0
___
$code
.=<<
___
if
(
$dir
eq
"
de
");
.
align
32
.
L$
{
bits
}
_xts_$
{
dir
}
steal:
ldx
[
$inp
+
0
],
%o0
brz
,
pt
$ileft
,
8
f
ldx
[
$inp
+
8
],
%o1
ldx
[
$inp
+
16
],
%o2
sllx
%o0
,
$ileft
,
%o0
srlx
%o1
,
$iright
,
%g1
sllx
%o1
,
$ileft
,
%o1
or
%g1
,
%o0
,
%o0
srlx
%o2
,
$iright
,
%o2
or
%o2
,
%o1
,
%o1
8
:
srax
%g3
,
63
,
%l7
!
next
tweak
value
addcc
%g2
,
%g2
,
%o2
and
%l7
,
0x87
,
%l7
addxc
%g3
,
%g3
,
%o3
xor
%l7
,
%o2
,
%o2
movxtod
%o2
,
%f12
movxtod
%o3
,
%f14
bshuffle
%f12
,
%f12
,
%f12
bshuffle
%f14
,
%f14
,
%f14
xor
%g4
,
%o0
,
%o0
!
^=
rk
[
0
]
xor
%g5
,
%o1
,
%o1
movxtod
%o0
,
%f0
movxtod
%o1
,
%f2
fxor
%f12
,
%f0
,
%f0
!
^=
tweak
[
0
]
fxor
%f14
,
%f2
,
%f2
call
_$
{
alg
}
$
{
bits
}
_$
{
dir
}
crypt_1x
add
$inp
,
16
,
$inp
fxor
%f12
,
%f0
,
%f0
!
^=
tweak
[
0
]
fxor
%f14
,
%f2
,
%f2
std
%f0
,
[
%fp
+
$
::
bias
-
16
]
std
%f2
,
[
%fp
+
$
::
bias
-
8
]
srl
$ileft
,
3
,
$ileft
add
%fp
,
$
::
bias
-
16
,
%l7
add
$inp
,
$ileft
,
$inp
!
original
$inp
+
$len
&-
15
add
$out
,
$ooff
,
$out
!
original
$out
+
$len
&-
15
mov
0
,
$ileft
add
$out
,
16
,
$out
nop
!
align
.
L$
{
bits
}
_xts_$
{
dir
}
stealing:
ldub
[
$inp
+
$ileft
],
%o0
ldub
[
%l7
+
$ileft
],
%o1
dec
$rem
stb
%o0
,
[
%l7
+
$ileft
]
stb
%o1
,
[
$out
+
$ileft
]
brnz
$rem
,
.
L$
{
bits
}
_xts_$
{
dir
}
stealing
inc
$ileft
mov
%l7
,
$inp
sub
$
out
,
16,
$
out
mov
0,
$
ileft
sub
$
out
,
$
ooff
,
$
out
ba
.L${
bits
}
_xts_$
{
dir
}
loop
!
one
more
time
mov
1
,
$len
!
$rem
is
0
___
$code
.=
<<___;
ret
restore
.type ${alg}${bits}_t4_xts_${dir}crypt,#function
.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt
___
}
# Purpose of these subroutines is to explicitly encode VIS instructions,
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
# so that one can compile the module without having to specify VIS
# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
...
@@ -909,6 +1383,7 @@ sub unvis {
...
@@ -909,6 +1383,7 @@ sub unvis {
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rd
)
=
@_
;
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rd
)
=
@_
;
my
(
$ref
,
$opf
);
my
(
$ref
,
$opf
);
my
%visopf
=
(
"
faligndata
"
=>
0x048
,
my
%visopf
=
(
"
faligndata
"
=>
0x048
,
"
bshuffle
"
=>
0x04c
,
"
fnot2
"
=>
0x066
,
"
fnot2
"
=>
0x066
,
"
fxor
"
=>
0x06c
,
"
fxor
"
=>
0x06c
,
"
fsrc2
"
=>
0x078
);
"
fsrc2
"
=>
0x078
);
...
@@ -933,13 +1408,17 @@ my %visopf = ( "faligndata" => 0x048,
...
@@ -933,13 +1408,17 @@ my %visopf = ( "faligndata" => 0x048,
return
$ref
;
return
$ref
;
}
}
}
}
sub
unvis3
{
sub
unvis3
{
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rd
)
=
@_
;
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rd
)
=
@_
;
my
%bias
=
(
"
g
"
=>
0
,
"
o
"
=>
8
,
"
l
"
=>
16
,
"
i
"
=>
24
);
my
%bias
=
(
"
g
"
=>
0
,
"
o
"
=>
8
,
"
l
"
=>
16
,
"
i
"
=>
24
);
my
(
$ref
,
$opf
);
my
(
$ref
,
$opf
);
my
%visopf
=
(
"
addxc
"
=>
0x011
,
my
%visopf
=
(
"
addxc
"
=>
0x011
,
"
addxccc
"
=>
0x013
,
"
addxccc
"
=>
0x013
,
"
umulxhi
"
=>
0x016
);
"
umulxhi
"
=>
0x016
,
"
alignaddr
"
=>
0x018
,
"
bmask
"
=>
0x019
,
"
alignaddrl
"
=>
0x01a
);
$ref
=
"
$mnemonic
\t
$rs1
,
$rs2
,
$rd
";
$ref
=
"
$mnemonic
\t
$rs1
,
$rs2
,
$rd
";
...
@@ -956,20 +1435,6 @@ my %visopf = ( "addxc" => 0x011,
...
@@ -956,20 +1435,6 @@ my %visopf = ( "addxc" => 0x011,
return
$ref
;
return
$ref
;
}
}
}
}
sub
unalignaddr
{
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rd
)
=
@_
;
my
%bias
=
(
"
g
"
=>
0
,
"
o
"
=>
8
,
"
l
"
=>
16
,
"
i
"
=>
24
);
my
$ref
=
"
$mnemonic
\t
$rs1
,
$rs2
,
$rd
";
my
$opf
=
$mnemonic
=~
/l$/
?
0x01a
:
0x18
;
foreach
(
$rs1
,
$rs2
,
$rd
)
{
if
(
/%([goli])([0-7])/
)
{
$_
=
$bias
{
$
1
}
+
$
2
;
}
else
{
return
$ref
;
}
}
return
sprintf
"
.word
\t
0x%08x !%s
",
0x81b00000
|
$rd
<<
25
|
$rs1
<<
14
|
$opf
<<
5
|
$rs2
,
$ref
;
}
sub
unaes_round
{
# 4-argument instructions
sub
unaes_round
{
# 4-argument instructions
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rs3
,
$rd
)
=
@_
;
my
(
$mnemonic
,
$rs1
,
$rs2
,
$rs3
,
$rd
)
=
@_
;
...
@@ -1121,35 +1586,32 @@ sub emit_assembler {
...
@@ -1121,35 +1586,32 @@ sub emit_assembler {
foreach
(
split
("
\n
",
$
::
code
))
{
foreach
(
split
("
\n
",
$
::
code
))
{
s/\`([^\`]*)\`/eval $1/g
e
;
s/\`([^\`]*)\`/eval $1/g
e
;
s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/g
;
s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/g
o
;
s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
&unaes_round($1,$2,$3,$4,$5)
&unaes_round($1,$2,$3,$4,$5)
/g
e
or
/g
e
o
or
s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
&unaes_kexpand($1,$2,$3,$4)
&unaes_kexpand($1,$2,$3,$4)
/g
e
or
/g
e
o
or
s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
&uncamellia_f($1,$2,$3,$4,$5)
&uncamellia_f($1,$2,$3,$4,$5)
/g
e
or
/g
e
o
or
s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
&uncamellia3($1,$2,$3,$4)
&uncamellia3($1,$2,$3,$4)
/g
e
or
/g
e
o
or
s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/
s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/
&unmovxtox($1,$2,$3)
&unmovxtox($1,$2,$3)
/g
e
or
/g
e
o
or
s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/
s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/
&unmovxtox($1,$2,$3)
&unmovxtox($1,$2,$3)
/g
e
or
/g
e
o
or
s/\b(
f
[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
s/\b(
[fb]
[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
&unvis($1,$2,$3,$4)
&unvis($1,$2,$3,$4)
/g
e
or
/g
eo
or
s/\b(alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
&unalignaddr($1,$2,$3,$4)
/g
e
or
s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
&unvis3($1,$2,$3,$4)
&unvis3($1,$2,$3,$4)
/g
e
;
/g
e
o
;
print
$_
,"
\n
";
print
$_
,"
\n
";
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录