Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
ed998634
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
接近 2 年 前同步成功
通知
12
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ed998634
编写于
3月 19, 2012
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
e_padlock-x86[_64].pl: better understanding of prefetch errata and proper
workaround.
上级
884c580e
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
204 addition
and
78 deletion
+204
-78
engines/asm/e_padlock-x86.pl
engines/asm/e_padlock-x86.pl
+81
-23
engines/asm/e_padlock-x86_64.pl
engines/asm/e_padlock-x86_64.pl
+123
-55
未找到文件。
engines/asm/e_padlock-x86.pl
浏览文件 @
ed998634
...
...
@@ -37,7 +37,7 @@ require "x86asm.pl";
&asm_init
(
$ARGV
[
0
],
$
0
);
%PADLOCK_
MARGIN
=
(
ecb
=>
128
,
cbc
=>
64
);
# prefetch errata
%PADLOCK_
PREFETCH
=
(
ecb
=>
128
,
cbc
=>
64
);
# prefetch errata
$PADLOCK_CHUNK
=
512
;
# Must be a power of 2 larger than 16
$ctx
=
"
edx
";
...
...
@@ -188,10 +188,6 @@ my ($mode,$opcode) = @_;
&movq
("
mm0
",
&QWP
(
-
16
,
$ctx
));
# load [upper part of] counter
}
else
{
&xor
("
ebx
","
ebx
");
if
(
$PADLOCK_MARGIN
{
$mode
})
{
&cmp
(
$len
,
$PADLOCK_MARGIN
{
$mode
});
&jbe
(
&label
("
${mode}
_short
"));
}
&test
(
&DWP
(
0
,
$ctx
),
1
<<
5
);
# align bit in control word
&jnz
(
&label
("
${mode}
_aligned
"));
&test
(
$out
,
0x0f
);
...
...
@@ -212,7 +208,27 @@ my ($mode,$opcode) = @_;
&neg
("
eax
");
&and
(
$chunk
,
$PADLOCK_CHUNK
-
1
);
# chunk=len%PADLOCK_CHUNK
&lea
("
esp
",
&DWP
(
0
,"
eax
","
ebp
"));
# alloca
&mov
("
eax
",
$PADLOCK_CHUNK
);
&cmovz
(
$chunk
,"
eax
");
# chunk=chunk?:PADLOCK_CHUNK
&mov
("
eax
","
ebp
");
&and
("
ebp
",
-
16
);
&and
("
esp
",
-
16
);
&mov
(
&DWP
(
16
,"
ebp
"),"
eax
");
if
(
$PADLOCK_PREFETCH
{
$mode
})
{
&cmp
(
$len
,
$chunk
);
&ja
(
&label
("
${mode}
_loop
"));
&mov
("
eax
",
$inp
);
# check if prefetch crosses page
&cmp
("
ebp
","
esp
");
&cmove
("
eax
",
$out
);
&add
("
eax
",
$len
);
&neg
("
eax
");
&and
("
eax
",
0xfff
);
# distance to page boundary
&cmp
("
eax
",
$PADLOCK_PREFETCH
{
$mode
});
&mov
("
eax
",
-
$PADLOCK_PREFETCH
{
$mode
});
&cmovae
("
eax
",
$chunk
);
# mask=distance<prefetch?-prefetch:-1
&and
(
$chunk
,"
eax
");
&jz
(
&label
("
${mode}
_unaligned_tail
"));
}
&jmp
(
&label
("
${mode}
_loop
"));
&set_label
("
${mode}
_loop
",
16
);
...
...
@@ -276,8 +292,8 @@ my ($mode,$opcode) = @_;
&test
(
$out
,
0x0f
);
&jz
(
&label
("
${mode}
_out_aligned
"));
&mov
(
$len
,
$chunk
);
&shr
(
$len
,
2
);
&lea
(
$inp
,
&DWP
(
0
,"
esp
"));
&shr
(
$len
,
2
);
&data_byte
(
0xf3
,
0xa5
);
# rep movsl
&sub
(
$out
,
$chunk
);
&set_label
("
${mode}
_out_aligned
");
...
...
@@ -288,7 +304,30 @@ my ($mode,$opcode) = @_;
&add
(
$inp
,
$chunk
);
&sub
(
$len
,
$chunk
);
&mov
(
$chunk
,
$PADLOCK_CHUNK
);
if
(
!
$PADLOCK_PREFETCH
{
$mode
})
{
&jnz
(
&label
("
${mode}
_loop
"));
}
else
{
&jz
(
&label
("
${mode}
_break
"));
&cmp
(
$len
,
$chunk
);
&jae
(
&label
("
${mode}
_loop
"));
&set_label
("
${mode}
_unaligned_tail
");
&xor
("
eax
","
eax
");
&cmp
("
esp
","
ebp
");
&cmove
("
eax
",
$len
);
&sub
("
esp
","
eax
");
# alloca
&mov
("
eax
",
$out
);
# save parameters
&mov
(
$chunk
,
$len
);
&shr
(
$len
,
2
);
&lea
(
$out
,
&DWP
(
0
,"
esp
"));
&data_byte
(
0xf3
,
0xa5
);
# rep movsl
&mov
(
$inp
,"
esp
");
&mov
(
$out
,"
eax
");
# restore parameters
&mov
(
$len
,
$chunk
);
&jmp
(
&label
("
${mode}
_loop
"));
&set_label
("
${mode}
_break
",
16
);
}
if
(
$mode
ne
"
ctr32
")
{
&cmp
("
esp
","
ebp
");
&je
(
&label
("
${mode}
_done
"));
...
...
@@ -302,28 +341,24 @@ my ($mode,$opcode) = @_;
&ja
(
&label
("
${mode}
_bzero
"));
&set_label
("
${mode}
_done
");
&mov
("
ebp
",
&DWP
(
16
,"
ebp
"));
&lea
("
esp
",
&DWP
(
24
,"
ebp
"));
if
(
$mode
ne
"
ctr32
")
{
&jmp
(
&label
("
${mode}
_exit
"));
&set_label
("
${mode}
_short
",
16
);
&xor
("
eax
","
eax
");
&lea
("
ebp
",
&DWP
(
-
24
,"
esp
"));
&sub
("
eax
",
$len
);
&lea
("
esp
",
&DWP
(
0
,"
eax
","
ebp
"));
&and
("
esp
",
-
16
);
&xor
(
$chunk
,
$chunk
);
&set_label
("
${mode}
_short_copy
");
&movups
("
xmm0
",
&QWP
(
0
,
$inp
,
$chunk
));
&lea
(
$chunk
,
&DWP
(
16
,
$chunk
));
&cmp
(
$len
,
$chunk
);
&movaps
(
&QWP
(
-
16
,"
esp
",
$chunk
),"
xmm0
");
&ja
(
&label
("
${mode}
_short_copy
"));
&mov
(
$inp
,"
esp
");
&mov
(
$chunk
,
$len
);
&jmp
(
&label
("
${mode}
_loop
"));
&set_label
("
${mode}
_aligned
",
16
);
if
(
$PADLOCK_PREFETCH
{
$mode
})
{
&lea
("
ebp
",
&DWP
(
0
,
$inp
,
$len
));
&neg
("
ebp
");
&and
("
ebp
",
0xfff
);
# distance to page boundary
&xor
("
eax
","
eax
");
&cmp
("
ebp
",
$PADLOCK_PREFETCH
{
$mode
});
&mov
("
ebp
",
$PADLOCK_PREFETCH
{
$mode
}
-
1
);
&cmovae
("
ebp
","
eax
");
&and
("
ebp
",
$len
);
# remainder
&sub
(
$len
,"
ebp
");
&jz
(
&label
("
${mode}
_aligned_tail
"));
}
&lea
("
eax
",
&DWP
(
-
16
,
$ctx
));
# ivp
&lea
("
ebx
",
&DWP
(
16
,
$ctx
));
# key
&shr
(
$len
,
4
);
# len/=AES_BLOCK_SIZE
...
...
@@ -332,6 +367,29 @@ my ($mode,$opcode) = @_;
&movaps
("
xmm0
",
&QWP
(
0
,"
eax
"));
&movaps
(
&QWP
(
-
16
,
$ctx
),"
xmm0
");
# copy [or refresh] iv
}
if
(
$PADLOCK_PREFETCH
{
$mode
})
{
&test
("
ebp
","
ebp
");
&jz
(
&label
("
${mode}
_exit
"));
&set_label
("
${mode}
_aligned_tail
");
&mov
(
$len
,"
ebp
");
&lea
("
ebp
",
&DWP
(
-
24
,"
esp
"));
&mov
("
esp
","
ebp
");
&mov
("
eax
","
ebp
");
&sub
("
esp
",
$len
);
&and
("
ebp
",
-
16
);
&and
("
esp
",
-
16
);
&mov
(
&DWP
(
16
,"
ebp
"),"
eax
");
&mov
("
eax
",
$out
);
# save parameters
&mov
(
$chunk
,
$len
);
&shr
(
$len
,
2
);
&lea
(
$out
,
&DWP
(
0
,"
esp
"));
&data_byte
(
0xf3
,
0xa5
);
# rep movsl
&mov
(
$inp
,"
esp
");
&mov
(
$out
,"
eax
");
# restore parameters
&mov
(
$len
,
$chunk
);
&jmp
(
&label
("
${mode}
_loop
"));
}
&set_label
("
${mode}
_exit
");
}
&mov
("
eax
",
1
);
&lea
("
esp
",
&DWP
(
4
,"
esp
"));
# popf
...
...
engines/asm/e_padlock-x86_64.pl
浏览文件 @
ed998634
...
...
@@ -27,7 +27,7 @@ open STDOUT,"| $^X $xlate $flavour $output";
$code
=
"
.text
\n
";
%PADLOCK_
MARGIN
=
(
ecb
=>
128
,
cbc
=>
64
,
ctr32
=>
64
);
# prefetch errata
%PADLOCK_
PREFETCH
=
(
ecb
=>
128
,
cbc
=>
64
,
ctr32
=>
32
);
# prefetch errata
$PADLOCK_CHUNK
=
512
;
# Must be a power of 2 between 32 and 2^20
$ctx
=
"
%rdx
";
...
...
@@ -285,17 +285,6 @@ padlock_${mode}_encrypt:
lea 16($ctx),$ctx # control word
xor %eax,%eax
xor %ebx,%ebx
___
# Formally speaking correct condtion is $len<=$margin and $inp+$margin
# crosses page boundary [and next page is unreadable]. But $inp can
# be unaligned in which case data can be copied to $out if latter is
# aligned, in which case $out+$margin has to be checked. Covering all
# cases appears more complicated than just copying short input...
$code
.=<<
___
if
(
$PADLOCK_MARGIN
{
$mode
});
cmp
\
$$PADLOCK_MARGIN
{
$mode
},
$len
jbe
.
L$
{
mode
}
_short
___
$code
.=
<<___;
testl \$`1<<5`,($ctx) # align bit in control word
jnz .L${mode}_aligned
test \$0x0f,$out
...
...
@@ -315,6 +304,8 @@ $code.=<<___;
neg %rax
and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
lea (%rax,%rbp),%rsp
mov \$$PADLOCK_CHUNK,%rax
cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK
___
$code
.=<<
___
if
(
$mode
eq
"
ctr32
");
.
L$
{
mode
}
_reenter:
...
...
@@ -322,10 +313,27 @@ $code.=<<___ if ($mode eq "ctr32");
bswap
%eax
neg
%eax
and
\
$`
$PADLOCK_CHUNK
/
16
-
1
`
,%eax
jz .L
${mode}
_loop
mov
\
$
$PADLOCK_CHUNK
,
$chunk
shl
\
$4,%eax
cmovz
$chunk
,%rax
cmp %rax,
$len
cmova %rax,
$chunk
# don't let counter cross PADLOCK_CHUNK
cmovbe
$len
,
$chunk
___
$code
.=<<___ if (
$PADLOCK_PREFETCH
{
$mode
});
cmp
$chunk
,
$len
ja .L
${mode}
_loop
mov
$inp
,%rax # check if prefetch crosses page
cmp %rsp,%rbp
cmove
$out
,%rax
add
$len
,%rax
neg %rax
and
\
$0xfff,%rax # distance to page boundary
cmp
\
$
$PADLOCK_PREFETCH
{
$mode
},%rax
mov
\
$-
$PADLOCK_PREFETCH
{
$mode
},%rax
cmovae
$chunk
,%rax # mask=distance<prefetch?-prefetch:-1
and %rax,
$chunk
jz .L
${mode}
_unaligned_tail
___
$code
.=<<___;
jmp .L
${mode}
_loop
...
...
@@ -360,12 +368,12 @@ ___
$code
.=<<___ if (
$mode
eq "ctr32");
mov -4(
$ctx
),%eax # pull 32-bit counter
test
\
$0xffff0000,%eax
jnz .L
${mode}
_no_c
orr
jnz .L
${mode}
_no_c
arry
bswap %eax
add
\
$0x10000,%eax
bswap %eax
mov %eax,-4(
$ctx
)
.L
${mode}
_no_c
orr
:
.L
${mode}
_no_c
arry
:
___
$code
.=<<___;
mov %r8,
$out
# restore paramters
...
...
@@ -373,8 +381,8 @@ $code.=<<___;
test
\
$0x0f,
$out
jz .L
${mode}
_out_aligned
mov
$chunk
,
$len
shr
\
$3,
$len
lea (%rsp),
$inp
shr
\
$3,
$len
.byte 0xf3,0x48,0xa5 # rep movsq
sub
$chunk
,
$out
.L
${mode}
_out_aligned:
...
...
@@ -384,9 +392,52 @@ $code.=<<___;
add
$chunk
,
$inp
sub
$chunk
,
$len
mov
\
$
$PADLOCK_CHUNK
,
$chunk
___
if (!
$PADLOCK_PREFETCH
{
$mode
}) {
$code
.=<<___;
jnz .L
${mode}
_loop
___
} else {
$code
.=<<___;
jz .L
${mode}
_break
cmp
$chunk
,
$len
jae .L
${mode}
_loop
___
$code
.=<<___ if (
$mode
eq "ctr32");
mov
$len
,
$chunk
mov
$inp
,%rax # check if prefetch crosses page
cmp %rsp,%rbp
cmove
$out
,%rax
add
$len
,%rax
neg %rax
and
\
$0xfff,%rax # distance to page boundary
cmp
\
$
$PADLOCK_PREFETCH
{
$mode
},%rax
mov
\
$-
$PADLOCK_PREFETCH
{
$mode
},%rax
cmovae
$chunk
,%rax
and %rax,
$chunk
jnz .L
${mode}
_loop
___
$code
.=<<___;
.L
${mode}
_unaligned_tail:
xor %eax,%eax
cmp %rsp,%rbp
cmove
$len
,%rax
mov
$out
,%r8 # save parameters
mov
$len
,
$chunk
sub %rax,%rsp # alloca
shr
\
$3,
$len
lea (%rsp),
$out
.byte 0xf3,0x48,0xa5 # rep movsq
mov %rsp,
$inp
mov %r8,
$out
# restore parameters
mov
$chunk
,
$len
jmp .L
${mode}
_loop
.align 16
.L
${mode}
_break:
___
}
$code
.=<<___;
cmp %rbp,%rsp
je .L
${mode}
_done
pxor %xmm0,%xmm0
...
...
@@ -400,70 +451,87 @@ $code.=<<___;
.L
${mode}
_done:
lea (%rbp),%rsp
jmp .L
${mode}
_exit
___
$code
.=<<___ if (
$PADLOCK_MARGIN
{
$mode
});
.align 16
.L
${mode}
_short:
mov %rsp,%rbp
sub
$len
,%rsp
xor
$chunk
,
$chunk
.L
${mode}
_short_copy:
movups (
$inp
,
$chunk
),%xmm0
lea 16(
$chunk
),
$chunk
cmp
$chunk
,
$len
movaps %xmm0,-16(%rsp,
$chunk
)
ja .L
${mode}
_short_copy
mov %rsp,
$inp
mov
$len
,
$chunk
jmp .L
${mode}
_
`
$
{
mode
}
eq
"
ctr32
"?"
reenter
":"
loop
"`
___
$code
.=<<___;
.align 16
.L
${mode}
_aligned:
___
$code
.=<<___ if (
$mode
eq "ctr32");
mov -4(
$ctx
),%eax # pull 32-bit counter
mov
\
$
`
16
*
0x10000
`
,
$chunk
bswap %eax
cmp
$len
,
$chunk
cmova
$len
,
$chunk
neg %eax
and
\
$0xffff,%eax
jz .L
${mode}
_aligned_loop
mov
\
$
`
16
*
0x10000
`
,
$chunk
shl
\
$4,%eax
cmovz
$chunk
,%rax
cmp %rax,
$len
cmova %rax,
$chunk
# don't let counter cross 2^16
jmp .L
${mode}
_aligned_loop
.align 16
cmovbe
$len
,
$chunk
jbe .L
${mode}
_aligned_skip
.L
${mode}
_aligned_loop:
cmp
$len
,
$chunk
cmova
$len
,
$chunk
mov
$len
,%r10 # save parameters
mov
$chunk
,
$len
mov
$chunk
,%r11
___
$code
.=<<___;
lea -16(
$ctx
),%rax # ivp
lea 16(
$ctx
),%rbx # key
shr
\
$4,
$len
# len/=AES_BLOCK_SIZE
.byte 0xf3,0x0f,0xa7,
$opcode
# rep xcrypt*
___
$code
.=<<___ if (
$mode
!~ /ecb|ctr/);
movdqa (%rax),%xmm0
movdqa %xmm0,-16(
$ctx
) # copy [or refresh] iv
___
$code
.=<<___ if (
$mode
eq "ctr32");
mov -4(
$ctx
),%eax # pull 32-bit counter
bswap %eax
add
\
$0x10000,%eax
bswap %eax
mov %eax,-4(
$ctx
)
mov %r11,
$chunk
# restore paramters
mov %r10,
$len
sub
$chunk
,
$len
mov %r10,
$len
# restore paramters
sub %r11,
$len
mov
\
$
`
16
*
0x10000
`
,
$chunk
jnz .L
${mode}
_aligned_loop
jz .L
${mode}
_exit
cmp
$chunk
,
$len
jae .L
${mode}
_aligned_loop
.L
${mode}
_aligned_skip:
___
$code
.=<<___ if (
$PADLOCK_PREFETCH
{
$mode
});
lea (
$inp
,
$len
),%rbp
neg %rbp
and
\
$0xfff,%rbp # distance to page boundary
xor %eax,%eax
cmp
\
$
$PADLOCK_PREFETCH
{
$mode
},%rbp
mov
\
$
$PADLOCK_PREFETCH
{
$mode
}-1,%rbp
cmovae %rax,%rbp
and
$len
,%rbp # remainder
sub %rbp,
$len
jz .L
${mode}
_aligned_tail
___
$code
.=<<___;
lea -16(
$ctx
),%rax # ivp
lea 16(
$ctx
),%rbx # key
shr
\
$4,
$len
# len/=AES_BLOCK_SIZE
.byte 0xf3,0x0f,0xa7,
$opcode
# rep xcrypt*
___
$code
.=<<___ if (
$mode
!~ /ecb|ctr/);
movdqa (%rax),%xmm0
movdqa %xmm0,-16(
$ctx
) # copy [or refresh] iv
___
$code
.=<<___ if (
$PADLOCK_PREFETCH
{
$mode
});
test %rbp,%rbp # check remainder
jz .L
${mode}
_exit
.L
${mode}
_aligned_tail:
mov
$out
,%r8
mov %rbp,
$chunk
mov %rbp,
$len
lea (%rsp),%rbp
sub
$len
,%rsp
shr
\
$3,
$len
lea (%rsp),
$out
.byte 0xf3,0x48,0xa5 # rep movsq
lea (%r8),
$out
lea (%rsp),
$inp
mov
$chunk
,
$len
jmp .L
${mode}
_loop
___
$code
.=<<___;
.L
${mode}
_exit:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录