Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenHarmony
Third Party Openssl
提交
2f98abbc
T
Third Party Openssl
项目概览
OpenHarmony
/
Third Party Openssl
1 年多 前同步成功
通知
10
Star
18
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Third Party Openssl
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2f98abbc
编写于
12月 14, 2002
作者:
A
Andy Polyakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
x86_64 performance patch.
上级
270fa8ae
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
685 addition
and
81 deletion
+685
-81
Configure
Configure
+1
-1
crypto/bn/Makefile.ssl
crypto/bn/Makefile.ssl
+2
-0
crypto/bn/asm/x86_64-gcc.c
crypto/bn/asm/x86_64-gcc.c
+575
-0
crypto/bn/bn_div.c
crypto/bn/bn_div.c
+17
-1
crypto/bn/bn_lcl.h
crypto/bn/bn_lcl.h
+16
-1
crypto/des/des_locl.h
crypto/des/des_locl.h
+10
-0
crypto/md32_common.h
crypto/md32_common.h
+27
-2
crypto/md4/md4_dgst.c
crypto/md4/md4_dgst.c
+4
-32
crypto/md5/md5_dgst.c
crypto/md5/md5_dgst.c
+4
-32
crypto/rc5/rc5_locl.h
crypto/rc5/rc5_locl.h
+17
-0
crypto/ripemd/rmd_dgst.c
crypto/ripemd/rmd_dgst.c
+6
-6
crypto/sha/sha_locl.h
crypto/sha/sha_locl.h
+6
-6
未找到文件。
Configure
浏览文件 @
2f98abbc
...
@@ -391,7 +391,7 @@ my %table=(
...
@@ -391,7 +391,7 @@ my %table=(
"linux-s390", "gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-s390", "gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-s390x", "gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-s390x", "gcc:-DB_ENDIAN -DTERMIO -DNO_ASM -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG::::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK RC4_CHAR:asm/ia64.o:::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK RC4_CHAR:asm/ia64.o:::::::::dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-x86_64", "gcc:-
DL_ENDIAN -DNO_ASM ::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG::::::::::dlfcn:linux-shared:-fPIC:
:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-x86_64", "gcc:-
m64 -DL_ENDIAN -DTERMIO -O3 -Wall -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK RC4_CHAR BF_PTR2 DES_INT DES_UNROLL:asm/x86_64-gcc.o:::::::::dlfcn:linux-shared:-fPIC:-m64
:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-sparc", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-sparc", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -mv8 -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-m68", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-m68", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall -DB_ENDIAN::(unknown):::BN_LLONG MD2_CHAR RC4_INDEX DES_UNROLL::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-x86", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -m486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"NetBSD-x86", "gcc:-DTERMIOS -O3 -fomit-frame-pointer -m486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}::::::::::dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
...
...
crypto/bn/Makefile.ssl
浏览文件 @
2f98abbc
...
@@ -138,6 +138,8 @@ asm/ia64-cpp.o: asm/ia64.S
...
@@ -138,6 +138,8 @@ asm/ia64-cpp.o: asm/ia64.S
$(CC)
$(ASFLAGS)
-c
-o
asm/ia64-cpp.o /tmp/ia64.
$$$$
.s
;
\
$(CC)
$(ASFLAGS)
-c
-o
asm/ia64-cpp.o /tmp/ia64.
$$$$
.s
;
\
rm
-f
/tmp/ia64.
$$$$
.s
rm
-f
/tmp/ia64.
$$$$
.s
asm/x86_64-gcc.o
:
asm/x86_64-gcc.c
files
:
files
:
$(PERL)
$(TOP)
/util/files.pl Makefile.ssl
>>
$(TOP)
/MINFO
$(PERL)
$(TOP)
/util/files.pl Makefile.ssl
>>
$(TOP)
/MINFO
...
...
crypto/bn/asm/x86_64-gcc.c
0 → 100644
浏览文件 @
2f98abbc
/*
* x86_64 BIGNUM accelerator version 0.1, December 2002.
*
* Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
* project.
*
* Rights for redistribution and usage in source and binary forms are
* granted according to the OpenSSL license. Warranty of any kind is
* disclaimed.
*
* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
* versions, like 1.0...
* A. Well, that's because this code is basically a quick-n-dirty
* proof-of-concept hack. As you can see it's implemented with
* inline assembler, which means that you're bound to GCC and that
* there must be a room for fine-tuning.
*
* Q. Why inline assembler?
* A. x86_64 features own ABI I'm not familiar with. Which is why
* I decided to let the compiler take care of subroutine
* prologue/epilogue as well as register allocation.
*
* Q. How much faster does it get?
* A. Unfortunately people sitting on x86_64 hardware are prohibited
* to disclose the performance numbers, so they (SuSE labs to be
* specific) wouldn't tell me. However! Very similar coding technique
* (reaching out for 128-bit result from 64x64-bit multiplication)
* results in >3 times performance improvement on MIPS and I see no
* reason why gain on x86_64 would be so much different:-)
*/
#define BN_ULONG unsigned long
/*
* "m"(a), "+m"(r) is the way to favor DirectPath -code;
* "g"(0) let the compiler to decide where does it
* want to keep the value of zero;
*/
#define mul_add(r,a,word,carry) do { \
register BN_ULONG high,low; \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(word),"m"(a) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(carry),"+d"(high)\
: "a"(low),"g"(0) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+m"(r),"+d"(high) \
: "r"(carry),"g"(0) \
: "cc"); \
carry=high; \
} while (0)
#define mul(r,a,word,carry) do { \
register BN_ULONG high,low; \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(word),"g"(a) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(carry),"+d"(high)\
: "a"(low),"g"(0) \
: "cc"); \
(r)=carry, carry=high; \
} while (0)
#define sqr(r0,r1,a) \
asm ("mulq %2" \
: "=a"(r0),"=d"(r1) \
: "a"(a) \
: "cc");
BN_ULONG
bn_mul_add_words
(
BN_ULONG
*
rp
,
BN_ULONG
*
ap
,
int
num
,
BN_ULONG
w
)
{
BN_ULONG
c1
=
0
;
if
(
num
<=
0
)
return
(
c1
);
while
(
num
&~
3
)
{
mul_add
(
rp
[
0
],
ap
[
0
],
w
,
c1
);
mul_add
(
rp
[
1
],
ap
[
1
],
w
,
c1
);
mul_add
(
rp
[
2
],
ap
[
2
],
w
,
c1
);
mul_add
(
rp
[
3
],
ap
[
3
],
w
,
c1
);
ap
+=
4
;
rp
+=
4
;
num
-=
4
;
}
if
(
num
)
{
mul_add
(
rp
[
0
],
ap
[
0
],
w
,
c1
);
if
(
--
num
==
0
)
return
c1
;
mul_add
(
rp
[
1
],
ap
[
1
],
w
,
c1
);
if
(
--
num
==
0
)
return
c1
;
mul_add
(
rp
[
2
],
ap
[
2
],
w
,
c1
);
return
c1
;
}
return
(
c1
);
}
BN_ULONG
bn_mul_words
(
BN_ULONG
*
rp
,
BN_ULONG
*
ap
,
int
num
,
BN_ULONG
w
)
{
BN_ULONG
c1
=
0
;
if
(
num
<=
0
)
return
(
c1
);
while
(
num
&~
3
)
{
mul
(
rp
[
0
],
ap
[
0
],
w
,
c1
);
mul
(
rp
[
1
],
ap
[
1
],
w
,
c1
);
mul
(
rp
[
2
],
ap
[
2
],
w
,
c1
);
mul
(
rp
[
3
],
ap
[
3
],
w
,
c1
);
ap
+=
4
;
rp
+=
4
;
num
-=
4
;
}
if
(
num
)
{
mul
(
rp
[
0
],
ap
[
0
],
w
,
c1
);
if
(
--
num
==
0
)
return
c1
;
mul
(
rp
[
1
],
ap
[
1
],
w
,
c1
);
if
(
--
num
==
0
)
return
c1
;
mul
(
rp
[
2
],
ap
[
2
],
w
,
c1
);
}
return
(
c1
);
}
void
bn_sqr_words
(
BN_ULONG
*
r
,
BN_ULONG
*
a
,
int
n
)
{
if
(
n
<=
0
)
return
;
while
(
n
&~
3
)
{
sqr
(
r
[
0
],
r
[
1
],
a
[
0
]);
sqr
(
r
[
2
],
r
[
3
],
a
[
1
]);
sqr
(
r
[
4
],
r
[
5
],
a
[
2
]);
sqr
(
r
[
6
],
r
[
7
],
a
[
3
]);
a
+=
4
;
r
+=
8
;
n
-=
4
;
}
if
(
n
)
{
sqr
(
r
[
0
],
r
[
1
],
a
[
0
]);
if
(
--
n
==
0
)
return
;
sqr
(
r
[
2
],
r
[
3
],
a
[
1
]);
if
(
--
n
==
0
)
return
;
sqr
(
r
[
4
],
r
[
5
],
a
[
2
]);
}
}
BN_ULONG
bn_div_words
(
BN_ULONG
h
,
BN_ULONG
l
,
BN_ULONG
d
)
{
BN_ULONG
ret
,
waste
;
asm
(
"divq %3"
:
"=a"
(
ret
),
"=d"
(
waste
)
:
"a"
(
l
),
"d"
(
h
),
"g"
(
d
)
:
"cc"
);
return
ret
;
}
BN_ULONG
bn_add_words
(
BN_ULONG
*
rp
,
BN_ULONG
*
ap
,
BN_ULONG
*
bp
,
int
n
)
{
BN_ULONG
ret
,
i
;
if
(
n
<=
0
)
return
0
;
asm
(
" subq %2,%2
\n
"
".align 16
\n
"
"1: movq (%4,%2,8),%0
\n
"
" adcq (%5,%2,8),%0
\n
"
" movq %0,(%3,%2,8)
\n
"
" leaq 1(%2),%2
\n
"
" loop 1b
\n
"
" sbbq %0,%0
\n
"
:
"+a"
(
ret
),
"+c"
(
n
),
"+r"
(
i
)
:
"r"
(
rp
),
"r"
(
ap
),
"r"
(
bp
)
:
"cc"
);
return
ret
&
1
;
}
#ifndef SIMICS
BN_ULONG
bn_sub_words
(
BN_ULONG
*
rp
,
BN_ULONG
*
ap
,
BN_ULONG
*
bp
,
int
n
)
{
BN_ULONG
ret
,
i
;
if
(
n
<=
0
)
return
0
;
asm
(
" subq %2,%2
\n
"
".align 16
\n
"
"1: movq (%4,%2,8),%0
\n
"
" sbbq (%5,%2,8),%0
\n
"
" movq %0,(%3,%2,8)
\n
"
" leaq 1(%2),%2
\n
"
" loop 1b
\n
"
" sbbq %0,%0
\n
"
:
"+a"
(
ret
),
"+c"
(
n
),
"+r"
(
i
)
:
"r"
(
rp
),
"r"
(
ap
),
"r"
(
bp
)
:
"cc"
);
return
ret
&
1
;
}
#else
/* Simics 1.4<7 has buggy sbbq:-( */
#define BN_MASK2 0xffffffffffffffffL
BN_ULONG
bn_sub_words
(
BN_ULONG
*
r
,
BN_ULONG
*
a
,
BN_ULONG
*
b
,
int
n
)
{
BN_ULONG
t1
,
t2
;
int
c
=
0
;
if
(
n
<=
0
)
return
((
BN_ULONG
)
0
);
for
(;;)
{
t1
=
a
[
0
];
t2
=
b
[
0
];
r
[
0
]
=
(
t1
-
t2
-
c
)
&
BN_MASK2
;
if
(
t1
!=
t2
)
c
=
(
t1
<
t2
);
if
(
--
n
<=
0
)
break
;
t1
=
a
[
1
];
t2
=
b
[
1
];
r
[
1
]
=
(
t1
-
t2
-
c
)
&
BN_MASK2
;
if
(
t1
!=
t2
)
c
=
(
t1
<
t2
);
if
(
--
n
<=
0
)
break
;
t1
=
a
[
2
];
t2
=
b
[
2
];
r
[
2
]
=
(
t1
-
t2
-
c
)
&
BN_MASK2
;
if
(
t1
!=
t2
)
c
=
(
t1
<
t2
);
if
(
--
n
<=
0
)
break
;
t1
=
a
[
3
];
t2
=
b
[
3
];
r
[
3
]
=
(
t1
-
t2
-
c
)
&
BN_MASK2
;
if
(
t1
!=
t2
)
c
=
(
t1
<
t2
);
if
(
--
n
<=
0
)
break
;
a
+=
4
;
b
+=
4
;
r
+=
4
;
}
return
(
c
);
}
#endif
/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
#if 0
/* original macros are kept for reference purposes */
#define mul_add_c(a,b,c0,c1,c2) { \
BN_ULONG ta=(a),tb=(b); \
t1 = ta * tb; \
t2 = BN_UMULT_HIGH(ta,tb); \
c0 += t1; t2 += (c0<t1)?1:0; \
c1 += t2; c2 += (c1<t2)?1:0; \
}
#define mul_add_c2(a,b,c0,c1,c2) { \
BN_ULONG ta=(a),tb=(b),t0; \
t1 = BN_UMULT_HIGH(ta,tb); \
t0 = ta * tb; \
t2 = t1+t1; c2 += (t2<t1)?1:0; \
t1 = t0+t0; t2 += (t1<t0)?1:0; \
c0 += t1; t2 += (c0<t1)?1:0; \
c1 += t2; c2 += (c1<t2)?1:0; \
}
#else
#define mul_add_c(a,b,c0,c1,c2) do { \
asm ("mulq %3" \
: "=a"(t1),"=d"(t2) \
: "a"(a),"m"(b) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c0),"+d"(t2) \
: "a"(t1),"g"(0) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c1),"+r"(c2) \
: "d"(t2),"g"(0) \
: "cc"); \
} while (0)
#define sqr_add_c(a,i,c0,c1,c2) do { \
asm ("mulq %2" \
: "=a"(t1),"=d"(t2) \
: "a"(a[i]) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c0),"+d"(t2) \
: "a"(t1),"g"(0) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c1),"+r"(c2) \
: "d"(t2),"g"(0) \
: "cc"); \
} while (0)
#define mul_add_c2(a,b,c0,c1,c2) do { \
asm ("mulq %3" \
: "=a"(t1),"=d"(t2) \
: "a"(a),"m"(b) \
: "cc"); \
asm ("addq %0,%0; adcq %2,%1" \
: "+d"(t2),"+r"(c2) \
: "g"(0) \
: "cc"); \
asm ("addq %0,%0; adcq %2,%1" \
: "+a"(t1),"+d"(t2) \
: "g"(0) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c0),"+d"(t2) \
: "a"(t1),"g"(0) \
: "cc"); \
asm ("addq %2,%0; adcq %3,%1" \
: "+r"(c1),"+r"(c2) \
: "d"(t2),"g"(0) \
: "cc"); \
} while (0)
#endif
#define sqr_add_c2(a,i,j,c0,c1,c2) \
mul_add_c2((a)[i],(a)[j],c0,c1,c2)
void
bn_mul_comba8
(
BN_ULONG
*
r
,
BN_ULONG
*
a
,
BN_ULONG
*
b
)
{
BN_ULONG
bl
,
bh
;
BN_ULONG
t1
,
t2
;
BN_ULONG
c1
,
c2
,
c3
;
c1
=
0
;
c2
=
0
;
c3
=
0
;
mul_add_c
(
a
[
0
],
b
[
0
],
c1
,
c2
,
c3
);
r
[
0
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
0
],
b
[
1
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
1
],
b
[
0
],
c2
,
c3
,
c1
);
r
[
1
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
2
],
b
[
0
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
1
],
b
[
1
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
0
],
b
[
2
],
c3
,
c1
,
c2
);
r
[
2
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
0
],
b
[
3
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
1
],
b
[
2
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
2
],
b
[
1
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
3
],
b
[
0
],
c1
,
c2
,
c3
);
r
[
3
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
4
],
b
[
0
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
3
],
b
[
1
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
2
],
b
[
2
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
1
],
b
[
3
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
0
],
b
[
4
],
c2
,
c3
,
c1
);
r
[
4
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
0
],
b
[
5
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
1
],
b
[
4
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
2
],
b
[
3
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
3
],
b
[
2
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
4
],
b
[
1
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
5
],
b
[
0
],
c3
,
c1
,
c2
);
r
[
5
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
6
],
b
[
0
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
5
],
b
[
1
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
4
],
b
[
2
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
3
],
b
[
3
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
2
],
b
[
4
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
1
],
b
[
5
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
0
],
b
[
6
],
c1
,
c2
,
c3
);
r
[
6
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
0
],
b
[
7
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
1
],
b
[
6
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
2
],
b
[
5
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
3
],
b
[
4
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
4
],
b
[
3
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
5
],
b
[
2
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
6
],
b
[
1
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
7
],
b
[
0
],
c2
,
c3
,
c1
);
r
[
7
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
7
],
b
[
1
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
6
],
b
[
2
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
5
],
b
[
3
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
4
],
b
[
4
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
3
],
b
[
5
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
2
],
b
[
6
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
1
],
b
[
7
],
c3
,
c1
,
c2
);
r
[
8
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
2
],
b
[
7
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
3
],
b
[
6
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
4
],
b
[
5
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
5
],
b
[
4
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
6
],
b
[
3
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
7
],
b
[
2
],
c1
,
c2
,
c3
);
r
[
9
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
7
],
b
[
3
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
6
],
b
[
4
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
5
],
b
[
5
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
4
],
b
[
6
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
3
],
b
[
7
],
c2
,
c3
,
c1
);
r
[
10
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
4
],
b
[
7
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
5
],
b
[
6
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
6
],
b
[
5
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
7
],
b
[
4
],
c3
,
c1
,
c2
);
r
[
11
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
7
],
b
[
5
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
6
],
b
[
6
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
5
],
b
[
7
],
c1
,
c2
,
c3
);
r
[
12
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
6
],
b
[
7
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
7
],
b
[
6
],
c2
,
c3
,
c1
);
r
[
13
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
7
],
b
[
7
],
c3
,
c1
,
c2
);
r
[
14
]
=
c3
;
r
[
15
]
=
c1
;
}
void
bn_mul_comba4
(
BN_ULONG
*
r
,
BN_ULONG
*
a
,
BN_ULONG
*
b
)
{
BN_ULONG
bl
,
bh
;
BN_ULONG
t1
,
t2
;
BN_ULONG
c1
,
c2
,
c3
;
c1
=
0
;
c2
=
0
;
c3
=
0
;
mul_add_c
(
a
[
0
],
b
[
0
],
c1
,
c2
,
c3
);
r
[
0
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
0
],
b
[
1
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
1
],
b
[
0
],
c2
,
c3
,
c1
);
r
[
1
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
2
],
b
[
0
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
1
],
b
[
1
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
0
],
b
[
2
],
c3
,
c1
,
c2
);
r
[
2
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
0
],
b
[
3
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
1
],
b
[
2
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
2
],
b
[
1
],
c1
,
c2
,
c3
);
mul_add_c
(
a
[
3
],
b
[
0
],
c1
,
c2
,
c3
);
r
[
3
]
=
c1
;
c1
=
0
;
mul_add_c
(
a
[
3
],
b
[
1
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
2
],
b
[
2
],
c2
,
c3
,
c1
);
mul_add_c
(
a
[
1
],
b
[
3
],
c2
,
c3
,
c1
);
r
[
4
]
=
c2
;
c2
=
0
;
mul_add_c
(
a
[
2
],
b
[
3
],
c3
,
c1
,
c2
);
mul_add_c
(
a
[
3
],
b
[
2
],
c3
,
c1
,
c2
);
r
[
5
]
=
c3
;
c3
=
0
;
mul_add_c
(
a
[
3
],
b
[
3
],
c1
,
c2
,
c3
);
r
[
6
]
=
c1
;
r
[
7
]
=
c2
;
}
void
bn_sqr_comba8
(
BN_ULONG
*
r
,
BN_ULONG
*
a
)
{
BN_ULONG
bl
,
bh
;
BN_ULONG
t1
,
t2
;
BN_ULONG
c1
,
c2
,
c3
;
c1
=
0
;
c2
=
0
;
c3
=
0
;
sqr_add_c
(
a
,
0
,
c1
,
c2
,
c3
);
r
[
0
]
=
c1
;
c1
=
0
;
sqr_add_c2
(
a
,
1
,
0
,
c2
,
c3
,
c1
);
r
[
1
]
=
c2
;
c2
=
0
;
sqr_add_c
(
a
,
1
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
2
,
0
,
c3
,
c1
,
c2
);
r
[
2
]
=
c3
;
c3
=
0
;
sqr_add_c2
(
a
,
3
,
0
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
2
,
1
,
c1
,
c2
,
c3
);
r
[
3
]
=
c1
;
c1
=
0
;
sqr_add_c
(
a
,
2
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
3
,
1
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
4
,
0
,
c2
,
c3
,
c1
);
r
[
4
]
=
c2
;
c2
=
0
;
sqr_add_c2
(
a
,
5
,
0
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
4
,
1
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
3
,
2
,
c3
,
c1
,
c2
);
r
[
5
]
=
c3
;
c3
=
0
;
sqr_add_c
(
a
,
3
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
4
,
2
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
5
,
1
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
6
,
0
,
c1
,
c2
,
c3
);
r
[
6
]
=
c1
;
c1
=
0
;
sqr_add_c2
(
a
,
7
,
0
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
6
,
1
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
5
,
2
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
4
,
3
,
c2
,
c3
,
c1
);
r
[
7
]
=
c2
;
c2
=
0
;
sqr_add_c
(
a
,
4
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
5
,
3
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
6
,
2
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
7
,
1
,
c3
,
c1
,
c2
);
r
[
8
]
=
c3
;
c3
=
0
;
sqr_add_c2
(
a
,
7
,
2
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
6
,
3
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
5
,
4
,
c1
,
c2
,
c3
);
r
[
9
]
=
c1
;
c1
=
0
;
sqr_add_c
(
a
,
5
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
6
,
4
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
7
,
3
,
c2
,
c3
,
c1
);
r
[
10
]
=
c2
;
c2
=
0
;
sqr_add_c2
(
a
,
7
,
4
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
6
,
5
,
c3
,
c1
,
c2
);
r
[
11
]
=
c3
;
c3
=
0
;
sqr_add_c
(
a
,
6
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
7
,
5
,
c1
,
c2
,
c3
);
r
[
12
]
=
c1
;
c1
=
0
;
sqr_add_c2
(
a
,
7
,
6
,
c2
,
c3
,
c1
);
r
[
13
]
=
c2
;
c2
=
0
;
sqr_add_c
(
a
,
7
,
c3
,
c1
,
c2
);
r
[
14
]
=
c3
;
r
[
15
]
=
c1
;
}
void
bn_sqr_comba4
(
BN_ULONG
*
r
,
BN_ULONG
*
a
)
{
BN_ULONG
bl
,
bh
;
BN_ULONG
t1
,
t2
;
BN_ULONG
c1
,
c2
,
c3
;
c1
=
0
;
c2
=
0
;
c3
=
0
;
sqr_add_c
(
a
,
0
,
c1
,
c2
,
c3
);
r
[
0
]
=
c1
;
c1
=
0
;
sqr_add_c2
(
a
,
1
,
0
,
c2
,
c3
,
c1
);
r
[
1
]
=
c2
;
c2
=
0
;
sqr_add_c
(
a
,
1
,
c3
,
c1
,
c2
);
sqr_add_c2
(
a
,
2
,
0
,
c3
,
c1
,
c2
);
r
[
2
]
=
c3
;
c3
=
0
;
sqr_add_c2
(
a
,
3
,
0
,
c1
,
c2
,
c3
);
sqr_add_c2
(
a
,
2
,
1
,
c1
,
c2
,
c3
);
r
[
3
]
=
c1
;
c1
=
0
;
sqr_add_c
(
a
,
2
,
c2
,
c3
,
c1
);
sqr_add_c2
(
a
,
3
,
1
,
c2
,
c3
,
c1
);
r
[
4
]
=
c2
;
c2
=
0
;
sqr_add_c2
(
a
,
3
,
2
,
c3
,
c1
,
c2
);
r
[
5
]
=
c3
;
c3
=
0
;
sqr_add_c
(
a
,
3
,
c1
,
c2
,
c3
);
r
[
6
]
=
c1
;
r
[
7
]
=
c2
;
}
crypto/bn/bn_div.c
浏览文件 @
2f98abbc
...
@@ -150,6 +150,20 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
...
@@ -150,6 +150,20 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
q; \
q; \
})
})
# define REMAINDER_IS_ALREADY_CALCULATED
# define REMAINDER_IS_ALREADY_CALCULATED
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
/*
* Same story here, but it's 128-bit by 64-bit division. Wow!
* <appro@fy.chalmers.se>
*/
# define bn_div_words(n0,n1,d0) \
({ asm volatile ( \
"divq %4" \
: "=a"(q), "=d"(rem) \
: "a"(n1), "d"(n0), "g"(d0) \
: "cc"); \
q; \
})
# define REMAINDER_IS_ALREADY_CALCULATED
# endif
/* __<cpu> */
# endif
/* __<cpu> */
# endif
/* __GNUC__ */
# endif
/* __GNUC__ */
#endif
/* OPENSSL_NO_ASM */
#endif
/* OPENSSL_NO_ASM */
...
@@ -296,7 +310,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
...
@@ -296,7 +310,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
rem
=
(
n1
-
q
*
d0
)
&
BN_MASK2
;
rem
=
(
n1
-
q
*
d0
)
&
BN_MASK2
;
#endif
#endif
#ifdef BN_UMULT_HIGH
#if defined(BN_UMULT_LOHI)
BN_UMULT_LOHI
(
t2l
,
t2h
,
d1
,
q
);
#elif defined(BN_UMULT_HIGH)
t2l
=
d1
*
q
;
t2l
=
d1
*
q
;
t2h
=
BN_UMULT_HIGH
(
d1
,
q
);
t2h
=
BN_UMULT_HIGH
(
d1
,
q
);
#else
#else
...
...
crypto/bn/bn_lcl.h
浏览文件 @
2f98abbc
...
@@ -230,6 +230,21 @@ struct bignum_ctx
...
@@ -230,6 +230,21 @@ struct bignum_ctx
: "r"(a), "r"(b)); \
: "r"(a), "r"(b)); \
ret; })
ret; })
# endif
/* compiler */
# endif
/* compiler */
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
asm ("mulq %3" \
: "=a"(discard),"=d"(ret) \
: "a"(a), "g"(b) \
: "cc"); \
ret; })
# define BN_UMULT_LOHI(low,high,a,b) \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(a),"g"(b) \
: "cc");
# endif
# endif
/* cpu */
# endif
/* cpu */
#endif
/* OPENSSL_NO_ASM */
#endif
/* OPENSSL_NO_ASM */
...
@@ -347,7 +362,7 @@ struct bignum_ctx
...
@@ -347,7 +362,7 @@ struct bignum_ctx
#define LBITS(a) ((a)&BN_MASK2l)
#define LBITS(a) ((a)&BN_MASK2l)
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
#define L2HBITS(a) ((
BN_ULONG)((a)&BN_MASK2l)<<BN_BITS4
)
#define L2HBITS(a) ((
(a)<<BN_BITS4)&BN_MASK2
)
#define LLBITS(a) ((a)&BN_MASKl)
#define LLBITS(a) ((a)&BN_MASKl)
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
...
...
crypto/des/des_locl.h
浏览文件 @
2f98abbc
...
@@ -159,6 +159,16 @@
...
@@ -159,6 +159,16 @@
#if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)
#if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)
#define ROTATE(a,n) (_lrotr(a,n))
#define ROTATE(a,n) (_lrotr(a,n))
#elif defined(__GNUC__) && __GNUC__>=2 && !defined(NO_ASM) && !defined(NO_INLINE_ASM)
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ("rorl %1,%0" \
: "=r"(ret) \
: "I"(n),"0"(a) \
: "cc"); \
ret; \
})
# endif
#else
#else
#define ROTATE(a,n) (((a)>>(n))+((a)<<(32-(n))))
#define ROTATE(a,n) (((a)>>(n))+((a)<<(32-(n))))
#endif
#endif
...
...
crypto/md32_common.h
浏览文件 @
2f98abbc
...
@@ -198,7 +198,7 @@
...
@@ -198,7 +198,7 @@
*
*
* <appro@fy.chalmers.se>
* <appro@fy.chalmers.se>
*/
*/
# if defined(__i386) || defined(__i386__)
# if defined(__i386) || defined(__i386__)
|| defined(__x86_64) || defined(__x86_64__)
# define ROTATE(a,n) ({ register unsigned int ret; \
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ( \
asm ( \
"roll %1,%0" \
"roll %1,%0" \
...
@@ -224,7 +224,7 @@
...
@@ -224,7 +224,7 @@
*/
*/
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
/* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
/* some GNU C inline assembler templates by <appro@fy.chalmers.se> */
# if (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)
# if (defined(__i386) || defined(__i386__)
|| defined(__x86_64) || defined(__x86_64__)
) && !defined(I386_ONLY)
# define BE_FETCH32(a) ({ register unsigned int l=(a);\
# define BE_FETCH32(a) ({ register unsigned int l=(a);\
asm ( \
asm ( \
"bswapl %0" \
"bswapl %0" \
...
@@ -610,3 +610,28 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c)
...
@@ -610,3 +610,28 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c)
*/
*/
return
1
;
return
1
;
}
}
#ifndef MD32_REG_T
#define MD32_REG_T long
/*
* This comment was originaly written for MD5, which is why it
* discusses A-D. But it basically applies to all 32-bit digests,
* which is why it was moved to common header file.
*
* In case you wonder why A-D are declared as long and not
* as MD5_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
* <appro@fy.chalmers.se>
* Apparently there're LP64 compilers that generate better
* code if A-D are declared int. Most notably GCC-x86_64
* generates better code.
* <appro@fy.chalmers.se>
*/
#endif
crypto/md4/md4_dgst.c
浏览文件 @
2f98abbc
...
@@ -86,21 +86,7 @@ int MD4_Init(MD4_CTX *c)
...
@@ -86,21 +86,7 @@ int MD4_Init(MD4_CTX *c)
void
md4_block_host_order
(
MD4_CTX
*
c
,
const
void
*
data
,
int
num
)
void
md4_block_host_order
(
MD4_CTX
*
c
,
const
void
*
data
,
int
num
)
{
{
const
MD4_LONG
*
X
=
data
;
const
MD4_LONG
*
X
=
data
;
register
unsigned
long
A
,
B
,
C
,
D
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
;
/*
* In case you wonder why A-D are declared as long and not
* as MD4_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
A
=
c
->
A
;
A
=
c
->
A
;
B
=
c
->
B
;
B
=
c
->
B
;
...
@@ -176,25 +162,11 @@ void md4_block_host_order (MD4_CTX *c, const void *data, int num)
...
@@ -176,25 +162,11 @@ void md4_block_host_order (MD4_CTX *c, const void *data, int num)
void
md4_block_data_order
(
MD4_CTX
*
c
,
const
void
*
data_
,
int
num
)
void
md4_block_data_order
(
MD4_CTX
*
c
,
const
void
*
data_
,
int
num
)
{
{
const
unsigned
char
*
data
=
data_
;
const
unsigned
char
*
data
=
data_
;
register
unsigned
long
A
,
B
,
C
,
D
,
l
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
l
;
/*
* In case you wonder why A-D are declared as long and not
* as MD4_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
#ifndef MD32_XARRAY
#ifndef MD32_XARRAY
/* See comment in crypto/sha/sha_locl.h for details. */
/* See comment in crypto/sha/sha_locl.h for details. */
unsigned
long
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
unsigned
MD32_REG_T
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
# define X(i) XX##i
# define X(i) XX##i
#else
#else
MD4_LONG
XX
[
MD4_LBLOCK
];
MD4_LONG
XX
[
MD4_LBLOCK
];
...
...
crypto/md5/md5_dgst.c
浏览文件 @
2f98abbc
...
@@ -86,21 +86,7 @@ int MD5_Init(MD5_CTX *c)
...
@@ -86,21 +86,7 @@ int MD5_Init(MD5_CTX *c)
void
md5_block_host_order
(
MD5_CTX
*
c
,
const
void
*
data
,
int
num
)
void
md5_block_host_order
(
MD5_CTX
*
c
,
const
void
*
data
,
int
num
)
{
{
const
MD5_LONG
*
X
=
data
;
const
MD5_LONG
*
X
=
data
;
register
unsigned
long
A
,
B
,
C
,
D
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
;
/*
* In case you wonder why A-D are declared as long and not
* as MD5_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
A
=
c
->
A
;
A
=
c
->
A
;
B
=
c
->
B
;
B
=
c
->
B
;
...
@@ -193,25 +179,11 @@ void md5_block_host_order (MD5_CTX *c, const void *data, int num)
...
@@ -193,25 +179,11 @@ void md5_block_host_order (MD5_CTX *c, const void *data, int num)
void
md5_block_data_order
(
MD5_CTX
*
c
,
const
void
*
data_
,
int
num
)
void
md5_block_data_order
(
MD5_CTX
*
c
,
const
void
*
data_
,
int
num
)
{
{
const
unsigned
char
*
data
=
data_
;
const
unsigned
char
*
data
=
data_
;
register
unsigned
long
A
,
B
,
C
,
D
,
l
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
l
;
/*
* In case you wonder why A-D are declared as long and not
* as MD5_LONG. Doing so results in slight performance
* boost on LP64 architectures. The catch is we don't
* really care if 32 MSBs of a 64-bit register get polluted
* with eventual overflows as we *save* only 32 LSBs in
* *either* case. Now declaring 'em long excuses the compiler
* from keeping 32 MSBs zeroed resulting in 13% performance
* improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
* Well, to be honest it should say that this *prevents*
* performance degradation.
*
* <appro@fy.chalmers.se>
*/
#ifndef MD32_XARRAY
#ifndef MD32_XARRAY
/* See comment in crypto/sha/sha_locl.h for details. */
/* See comment in crypto/sha/sha_locl.h for details. */
unsigned
long
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
unsigned
MD32_REG_T
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
# define X(i) XX##i
# define X(i) XX##i
#else
#else
MD5_LONG
XX
[
MD5_LBLOCK
];
MD5_LONG
XX
[
MD5_LBLOCK
];
...
...
crypto/rc5/rc5_locl.h
浏览文件 @
2f98abbc
...
@@ -149,6 +149,23 @@
...
@@ -149,6 +149,23 @@
#if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)
#if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)
#define ROTATE_l32(a,n) _lrotl(a,n)
#define ROTATE_l32(a,n) _lrotl(a,n)
#define ROTATE_r32(a,n) _lrotr(a,n)
#define ROTATE_r32(a,n) _lrotr(a,n)
#elif defined(__GNUC__) && __GNUC__>=2 && !defined(NO_ASM) && !defined(NO_INLINE_ASM)
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# define ROTATE_l32(a,n) ({ register unsigned int ret; \
asm ("roll %%cl,%0" \
: "=r"(ret) \
: "c"(n),"0"(a) \
: "cc"); \
ret; \
})
# define ROTATE_r32(a,n) ({ register unsigned int ret; \
asm ("rorl %%cl,%0" \
: "=r"(ret) \
: "c"(n),"0"(a) \
: "cc"); \
ret; \
})
# endif
#else
#else
#define ROTATE_l32(a,n) (((a)<<(n&0x1f))|(((a)&0xffffffff)>>(32-(n&0x1f))))
#define ROTATE_l32(a,n) (((a)<<(n&0x1f))|(((a)&0xffffffff)>>(32-(n&0x1f))))
#define ROTATE_r32(a,n) (((a)<<(32-(n&0x1f)))|(((a)&0xffffffff)>>(n&0x1f)))
#define ROTATE_r32(a,n) (((a)<<(32-(n&0x1f)))|(((a)&0xffffffff)>>(n&0x1f)))
...
...
crypto/ripemd/rmd_dgst.c
浏览文件 @
2f98abbc
...
@@ -90,8 +90,8 @@ int RIPEMD160_Init(RIPEMD160_CTX *c)
...
@@ -90,8 +90,8 @@ int RIPEMD160_Init(RIPEMD160_CTX *c)
void
ripemd160_block_host_order
(
RIPEMD160_CTX
*
ctx
,
const
void
*
p
,
int
num
)
void
ripemd160_block_host_order
(
RIPEMD160_CTX
*
ctx
,
const
void
*
p
,
int
num
)
{
{
const
RIPEMD160_LONG
*
XX
=
p
;
const
RIPEMD160_LONG
*
XX
=
p
;
register
unsigned
long
A
,
B
,
C
,
D
,
E
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
E
;
register
unsigned
long
a
,
b
,
c
,
d
,
e
;
register
unsigned
MD32_REG_T
a
,
b
,
c
,
d
,
e
;
for
(;
num
--
;
XX
+=
HASH_LBLOCK
)
for
(;
num
--
;
XX
+=
HASH_LBLOCK
)
{
{
...
@@ -290,12 +290,12 @@ void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, int num)
...
@@ -290,12 +290,12 @@ void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, int num)
void
ripemd160_block_data_order
(
RIPEMD160_CTX
*
ctx
,
const
void
*
p
,
int
num
)
void
ripemd160_block_data_order
(
RIPEMD160_CTX
*
ctx
,
const
void
*
p
,
int
num
)
{
{
const
unsigned
char
*
data
=
p
;
const
unsigned
char
*
data
=
p
;
register
unsigned
long
A
,
B
,
C
,
D
,
E
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
E
;
unsigned
long
a
,
b
,
c
,
d
,
e
,
l
;
unsigned
MD32_REG_T
a
,
b
,
c
,
d
,
e
,
l
;
#ifndef MD32_XARRAY
#ifndef MD32_XARRAY
/* See comment in crypto/sha/sha_locl.h for details. */
/* See comment in crypto/sha/sha_locl.h for details. */
unsigned
long
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
unsigned
MD32_REG_T
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
# define X(i) XX##i
# define X(i) XX##i
#else
#else
RIPEMD160_LONG
XX
[
16
];
RIPEMD160_LONG
XX
[
16
];
...
...
crypto/sha/sha_locl.h
浏览文件 @
2f98abbc
...
@@ -224,10 +224,10 @@ int HASH_INIT (SHA_CTX *c)
...
@@ -224,10 +224,10 @@ int HASH_INIT (SHA_CTX *c)
void
HASH_BLOCK_HOST_ORDER
(
SHA_CTX
*
c
,
const
void
*
d
,
int
num
)
void
HASH_BLOCK_HOST_ORDER
(
SHA_CTX
*
c
,
const
void
*
d
,
int
num
)
{
{
const
SHA_LONG
*
W
=
d
;
const
SHA_LONG
*
W
=
d
;
register
unsigned
long
A
,
B
,
C
,
D
,
E
,
T
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
E
,
T
;
#ifndef MD32_XARRAY
#ifndef MD32_XARRAY
unsigned
long
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
unsigned
MD32_REG_T
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
#else
#else
SHA_LONG
XX
[
16
];
SHA_LONG
XX
[
16
];
#endif
#endif
...
@@ -349,10 +349,10 @@ void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, int num)
...
@@ -349,10 +349,10 @@ void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, int num)
void
HASH_BLOCK_DATA_ORDER
(
SHA_CTX
*
c
,
const
void
*
p
,
int
num
)
void
HASH_BLOCK_DATA_ORDER
(
SHA_CTX
*
c
,
const
void
*
p
,
int
num
)
{
{
const
unsigned
char
*
data
=
p
;
const
unsigned
char
*
data
=
p
;
register
unsigned
long
A
,
B
,
C
,
D
,
E
,
T
,
l
;
register
unsigned
MD32_REG_T
A
,
B
,
C
,
D
,
E
,
T
,
l
;
#ifndef MD32_XARRAY
#ifndef MD32_XARRAY
unsigned
long
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
unsigned
MD32_REG_T
XX0
,
XX1
,
XX2
,
XX3
,
XX4
,
XX5
,
XX6
,
XX7
,
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
XX8
,
XX9
,
XX10
,
XX11
,
XX12
,
XX13
,
XX14
,
XX15
;
#else
#else
SHA_LONG
XX
[
16
];
SHA_LONG
XX
[
16
];
#endif
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录