Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell8_hotspot
提交
de15436a
D
dragonwell8_hotspot
项目概览
openanolis
/
dragonwell8_hotspot
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell8_hotspot
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
de15436a
编写于
4月 04, 2016
作者:
A
asaha
浏览文件
操作
浏览文件
下载
差异文件
Merge
上级
b909b356
03f3411f
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
1868 addition
and
10 deletion
+1868
-10
.hgtags
.hgtags
+8
-0
src/cpu/x86/vm/assembler_x86.cpp
src/cpu/x86/vm/assembler_x86.cpp
+20
-0
src/cpu/x86/vm/assembler_x86.hpp
src/cpu/x86/vm/assembler_x86.hpp
+3
-0
src/cpu/x86/vm/macroAssembler_x86.cpp
src/cpu/x86/vm/macroAssembler_x86.cpp
+497
-0
src/cpu/x86/vm/macroAssembler_x86.hpp
src/cpu/x86/vm/macroAssembler_x86.hpp
+19
-0
src/cpu/x86/vm/sharedRuntime_x86_64.cpp
src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+253
-0
src/cpu/x86/vm/stubGenerator_x86_64.cpp
src/cpu/x86/vm/stubGenerator_x86_64.cpp
+119
-1
src/cpu/x86/vm/stubRoutines_x86_64.hpp
src/cpu/x86/vm/stubRoutines_x86_64.hpp
+1
-1
src/cpu/x86/vm/vm_version_x86.cpp
src/cpu/x86/vm/vm_version_x86.cpp
+36
-0
src/share/vm/classfile/vmSymbols.hpp
src/share/vm/classfile/vmSymbols.hpp
+17
-1
src/share/vm/opto/c2_globals.hpp
src/share/vm/opto/c2_globals.hpp
+12
-0
src/share/vm/opto/escape.cpp
src/share/vm/opto/escape.cpp
+6
-2
src/share/vm/opto/library_call.cpp
src/share/vm/opto/library_call.cpp
+245
-5
src/share/vm/opto/runtime.cpp
src/share/vm/opto/runtime.cpp
+88
-0
src/share/vm/opto/runtime.hpp
src/share/vm/opto/runtime.hpp
+6
-0
src/share/vm/runtime/sharedRuntime.hpp
src/share/vm/runtime/sharedRuntime.hpp
+6
-0
src/share/vm/runtime/stubRoutines.cpp
src/share/vm/runtime/stubRoutines.cpp
+4
-0
src/share/vm/runtime/stubRoutines.hpp
src/share/vm/runtime/stubRoutines.hpp
+8
-0
src/share/vm/runtime/vmStructs.cpp
src/share/vm/runtime/vmStructs.cpp
+2
-0
test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java
...intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java
+284
-0
test/compiler/intrinsics/muladd/TestMulAdd.java
test/compiler/intrinsics/muladd/TestMulAdd.java
+118
-0
test/compiler/intrinsics/squaretolen/TestSquareToLen.java
test/compiler/intrinsics/squaretolen/TestSquareToLen.java
+116
-0
未找到文件。
.hgtags
浏览文件 @
de15436a
...
...
@@ -810,6 +810,12 @@ da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31
c1031a924f2c910fad078838b88a2f0146f2de98 jdk8u74-b01
ca9cae9aa9e989bbe6713c91d55c913edeaecce4 jdk8u74-b02
a5b78b56841e97ce00463874f1b7f63c54d84934 jdk8u74-b31
94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32
1b6d4fd2730e58f17820930f797938dc182117c4 jdk8u77-b00
ddd297e340b1170d3cec011ee64e729f8b493c86 jdk8u77-b01
1b4072e4bb3ad54c4e894998486a8b33f0689160 jdk8u77-b02
223b64a19e94222dd97b92bb40abcfbc0bf6ef1f jdk8u77-b03
dd8507f51d786572dae18af8ffdc5a1ea34c755e jdk8u77-b31
94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32
da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31
...
...
@@ -832,6 +838,7 @@ ddd297e340b1170d3cec011ee64e729f8b493c86 jdk8u77-b01
223b64a19e94222dd97b92bb40abcfbc0bf6ef1f jdk8u77-b03
bbbb05e91c629f8d9eef2ba43933767f68a898b0 jdk8u91-b00
e36b6ade0499eadfd8673fe62ef0a613af2e6d67 jdk8u91-b13
fa8991ccf6e5b74890a0b5672440b3c09d8d8732 jdk8u91-b14
d7b01fb81aa8a5437cb03bc36afe15cf0e55fb89 jdk8u76-b00
c1679cc87ba045219169cabb6b9b378c2b5cc578 jdk8u76-b01
218483967e52b419d885d34af4488a81c5133804 jdk8u76-b02
...
...
@@ -847,5 +854,6 @@ a2b0ee820059a44be558a2d435b7d85ed5a8b63a jdk8u76-b10
9a87701e22b3cae79fdfd8cdb732051e02a710fa jdk8u76-b12
481dcde745b6aec035781ed9f6797cfc93719f71 jdk8u92-b00
f3e1e734e2d29101a9537ddeb71ecad413fcd352 jdk8u92-b13
24a09407d71bb2cc4848bfa21660c890b4d722b1 jdk8u92-b14
b374548dcb4834eb8731a06b52faddd0f10bd45d jdk8u81-b00
ead07188d11107e877e8e4ad215ff6cb238a8a92 jdk8u101-b01
src/cpu/x86/vm/assembler_x86.cpp
浏览文件 @
de15436a
...
...
@@ -2318,6 +2318,13 @@ void Assembler::orl(Register dst, Register src) {
emit_arith
(
0x0B
,
0xC0
,
dst
,
src
);
}
void
Assembler
::
orl
(
Address
dst
,
Register
src
)
{
InstructionMark
im
(
this
);
prefix
(
dst
,
src
);
emit_int8
(
0x09
);
emit_operand
(
src
,
dst
);
}
void
Assembler
::
packuswb
(
XMMRegister
dst
,
Address
src
)
{
NOT_LP64
(
assert
(
VM_Version
::
supports_sse2
(),
""
));
assert
((
UseAVX
>
0
),
"SSE mode requires address alignment 16 bytes"
);
...
...
@@ -5613,6 +5620,19 @@ void Assembler::rclq(Register dst, int imm8) {
}
}
void
Assembler
::
rcrq
(
Register
dst
,
int
imm8
)
{
assert
(
isShiftCount
(
imm8
>>
1
),
"illegal shift count"
);
int
encode
=
prefixq_and_encode
(
dst
->
encoding
());
if
(
imm8
==
1
)
{
emit_int8
((
unsigned
char
)
0xD1
);
emit_int8
((
unsigned
char
)(
0xD8
|
encode
));
}
else
{
emit_int8
((
unsigned
char
)
0xC1
);
emit_int8
((
unsigned
char
)(
0xD8
|
encode
));
emit_int8
(
imm8
);
}
}
void
Assembler
::
rorq
(
Register
dst
,
int
imm8
)
{
assert
(
isShiftCount
(
imm8
>>
1
),
"illegal shift count"
);
int
encode
=
prefixq_and_encode
(
dst
->
encoding
());
...
...
src/cpu/x86/vm/assembler_x86.hpp
浏览文件 @
de15436a
...
...
@@ -1455,6 +1455,7 @@ private:
void
orl
(
Register
dst
,
int32_t
imm32
);
void
orl
(
Register
dst
,
Address
src
);
void
orl
(
Register
dst
,
Register
src
);
void
orl
(
Address
dst
,
Register
src
);
void
orq
(
Address
dst
,
int32_t
imm32
);
void
orq
(
Register
dst
,
int32_t
imm32
);
...
...
@@ -1555,6 +1556,8 @@ private:
void
rclq
(
Register
dst
,
int
imm8
);
void
rcrq
(
Register
dst
,
int
imm8
);
void
rdtsc
();
void
ret
(
int
imm16
);
...
...
src/cpu/x86/vm/macroAssembler_x86.cpp
浏览文件 @
de15436a
...
...
@@ -7769,6 +7769,503 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
pop
(
tmp2
);
pop
(
tmp1
);
}
//Helper functions for square_to_len()
/**
* Store the squares of x[], right shifted one bit (divided by 2) into z[]
* Preserves x and z and modifies rest of the registers.
*/
void
MacroAssembler
::
square_rshift
(
Register
x
,
Register
xlen
,
Register
z
,
Register
tmp1
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
)
{
// Perform square and right shift by 1
// Handle odd xlen case first, then for even xlen do the following
// jlong carry = 0;
// for (int j=0, i=0; j < xlen; j+=2, i+=4) {
// huge_128 product = x[j:j+1] * x[j:j+1];
// z[i:i+1] = (carry << 63) | (jlong)(product >>> 65);
// z[i+2:i+3] = (jlong)(product >>> 1);
// carry = (jlong)product;
// }
xorq
(
tmp5
,
tmp5
);
// carry
xorq
(
rdxReg
,
rdxReg
);
xorl
(
tmp1
,
tmp1
);
// index for x
xorl
(
tmp4
,
tmp4
);
// index for z
Label
L_first_loop
,
L_first_loop_exit
;
testl
(
xlen
,
1
);
jccb
(
Assembler
::
zero
,
L_first_loop
);
//jump if xlen is even
// Square and right shift by 1 the odd element using 32 bit multiply
movl
(
raxReg
,
Address
(
x
,
tmp1
,
Address
::
times_4
,
0
));
imulq
(
raxReg
,
raxReg
);
shrq
(
raxReg
,
1
);
adcq
(
tmp5
,
0
);
movq
(
Address
(
z
,
tmp4
,
Address
::
times_4
,
0
),
raxReg
);
incrementl
(
tmp1
);
addl
(
tmp4
,
2
);
// Square and right shift by 1 the rest using 64 bit multiply
bind
(
L_first_loop
);
cmpptr
(
tmp1
,
xlen
);
jccb
(
Assembler
::
equal
,
L_first_loop_exit
);
// Square
movq
(
raxReg
,
Address
(
x
,
tmp1
,
Address
::
times_4
,
0
));
rorq
(
raxReg
,
32
);
// convert big-endian to little-endian
mulq
(
raxReg
);
// 64-bit multiply rax * rax -> rdx:rax
// Right shift by 1 and save carry
shrq
(
tmp5
,
1
);
// rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1
rcrq
(
rdxReg
,
1
);
rcrq
(
raxReg
,
1
);
adcq
(
tmp5
,
0
);
// Store result in z
movq
(
Address
(
z
,
tmp4
,
Address
::
times_4
,
0
),
rdxReg
);
movq
(
Address
(
z
,
tmp4
,
Address
::
times_4
,
8
),
raxReg
);
// Update indices for x and z
addl
(
tmp1
,
2
);
addl
(
tmp4
,
4
);
jmp
(
L_first_loop
);
bind
(
L_first_loop_exit
);
}
/**
* Perform the following multiply add operation using BMI2 instructions
* carry:sum = sum + op1*op2 + carry
* op2 should be in rdx
* op2 is preserved, all other registers are modified
*/
void
MacroAssembler
::
multiply_add_64_bmi2
(
Register
sum
,
Register
op1
,
Register
op2
,
Register
carry
,
Register
tmp2
)
{
// assert op2 is rdx
mulxq
(
tmp2
,
op1
,
op1
);
// op1 * op2 -> tmp2:op1
addq
(
sum
,
carry
);
adcq
(
tmp2
,
0
);
addq
(
sum
,
op1
);
adcq
(
tmp2
,
0
);
movq
(
carry
,
tmp2
);
}
/**
* Perform the following multiply add operation:
* carry:sum = sum + op1*op2 + carry
* Preserves op1, op2 and modifies rest of registers
*/
void
MacroAssembler
::
multiply_add_64
(
Register
sum
,
Register
op1
,
Register
op2
,
Register
carry
,
Register
rdxReg
,
Register
raxReg
)
{
// rdx:rax = op1 * op2
movq
(
raxReg
,
op2
);
mulq
(
op1
);
// rdx:rax = sum + carry + rdx:rax
addq
(
sum
,
carry
);
adcq
(
rdxReg
,
0
);
addq
(
sum
,
raxReg
);
adcq
(
rdxReg
,
0
);
// carry:sum = rdx:sum
movq
(
carry
,
rdxReg
);
}
/**
* Add 64 bit long carry into z[] with carry propogation.
* Preserves z and carry register values and modifies rest of registers.
*
*/
void
MacroAssembler
::
add_one_64
(
Register
z
,
Register
zlen
,
Register
carry
,
Register
tmp1
)
{
Label
L_fourth_loop
,
L_fourth_loop_exit
;
movl
(
tmp1
,
1
);
subl
(
zlen
,
2
);
addq
(
Address
(
z
,
zlen
,
Address
::
times_4
,
0
),
carry
);
bind
(
L_fourth_loop
);
jccb
(
Assembler
::
carryClear
,
L_fourth_loop_exit
);
subl
(
zlen
,
2
);
jccb
(
Assembler
::
negative
,
L_fourth_loop_exit
);
addq
(
Address
(
z
,
zlen
,
Address
::
times_4
,
0
),
tmp1
);
jmp
(
L_fourth_loop
);
bind
(
L_fourth_loop_exit
);
}
/**
* Shift z[] left by 1 bit.
* Preserves x, len, z and zlen registers and modifies rest of the registers.
*
*/
void
MacroAssembler
::
lshift_by_1
(
Register
x
,
Register
len
,
Register
z
,
Register
zlen
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
)
{
Label
L_fifth_loop
,
L_fifth_loop_exit
;
// Fifth loop
// Perform primitiveLeftShift(z, zlen, 1)
const
Register
prev_carry
=
tmp1
;
const
Register
new_carry
=
tmp4
;
const
Register
value
=
tmp2
;
const
Register
zidx
=
tmp3
;
// int zidx, carry;
// long value;
// carry = 0;
// for (zidx = zlen-2; zidx >=0; zidx -= 2) {
// (carry:value) = (z[i] << 1) | carry ;
// z[i] = value;
// }
movl
(
zidx
,
zlen
);
xorl
(
prev_carry
,
prev_carry
);
// clear carry flag and prev_carry register
bind
(
L_fifth_loop
);
decl
(
zidx
);
// Use decl to preserve carry flag
decl
(
zidx
);
jccb
(
Assembler
::
negative
,
L_fifth_loop_exit
);
if
(
UseBMI2Instructions
)
{
movq
(
value
,
Address
(
z
,
zidx
,
Address
::
times_4
,
0
));
rclq
(
value
,
1
);
rorxq
(
value
,
value
,
32
);
movq
(
Address
(
z
,
zidx
,
Address
::
times_4
,
0
),
value
);
// Store back in big endian form
}
else
{
// clear new_carry
xorl
(
new_carry
,
new_carry
);
// Shift z[i] by 1, or in previous carry and save new carry
movq
(
value
,
Address
(
z
,
zidx
,
Address
::
times_4
,
0
));
shlq
(
value
,
1
);
adcl
(
new_carry
,
0
);
orq
(
value
,
prev_carry
);
rorq
(
value
,
0x20
);
movq
(
Address
(
z
,
zidx
,
Address
::
times_4
,
0
),
value
);
// Store back in big endian form
// Set previous carry = new carry
movl
(
prev_carry
,
new_carry
);
}
jmp
(
L_fifth_loop
);
bind
(
L_fifth_loop_exit
);
}
/**
* Code for BigInteger::squareToLen() intrinsic
*
* rdi: x
* rsi: len
* r8: z
* rcx: zlen
* r12: tmp1
* r13: tmp2
* r14: tmp3
* r15: tmp4
* rbx: tmp5
*
*/
void
MacroAssembler
::
square_to_len
(
Register
x
,
Register
len
,
Register
z
,
Register
zlen
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
)
{
Label
L_second_loop
,
L_second_loop_exit
,
L_third_loop
,
L_third_loop_exit
,
fifth_loop
,
fifth_loop_exit
,
L_last_x
,
L_multiply
;
push
(
tmp1
);
push
(
tmp2
);
push
(
tmp3
);
push
(
tmp4
);
push
(
tmp5
);
// First loop
// Store the squares, right shifted one bit (i.e., divided by 2).
square_rshift
(
x
,
len
,
z
,
tmp1
,
tmp3
,
tmp4
,
tmp5
,
rdxReg
,
raxReg
);
// Add in off-diagonal sums.
//
// Second, third (nested) and fourth loops.
// zlen +=2;
// for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) {
// carry = 0;
// long op2 = x[xidx:xidx+1];
// for (int j=xidx-2,k=zidx; j >= 0; j-=2) {
// k -= 2;
// long op1 = x[j:j+1];
// long sum = z[k:k+1];
// carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs);
// z[k:k+1] = sum;
// }
// add_one_64(z, k, carry, tmp_regs);
// }
const
Register
carry
=
tmp5
;
const
Register
sum
=
tmp3
;
const
Register
op1
=
tmp4
;
Register
op2
=
tmp2
;
push
(
zlen
);
push
(
len
);
addl
(
zlen
,
2
);
bind
(
L_second_loop
);
xorq
(
carry
,
carry
);
subl
(
zlen
,
4
);
subl
(
len
,
2
);
push
(
zlen
);
push
(
len
);
cmpl
(
len
,
0
);
jccb
(
Assembler
::
lessEqual
,
L_second_loop_exit
);
// Multiply an array by one 64 bit long.
if
(
UseBMI2Instructions
)
{
op2
=
rdxReg
;
movq
(
op2
,
Address
(
x
,
len
,
Address
::
times_4
,
0
));
rorxq
(
op2
,
op2
,
32
);
}
else
{
movq
(
op2
,
Address
(
x
,
len
,
Address
::
times_4
,
0
));
rorq
(
op2
,
32
);
}
bind
(
L_third_loop
);
decrementl
(
len
);
jccb
(
Assembler
::
negative
,
L_third_loop_exit
);
decrementl
(
len
);
jccb
(
Assembler
::
negative
,
L_last_x
);
movq
(
op1
,
Address
(
x
,
len
,
Address
::
times_4
,
0
));
rorq
(
op1
,
32
);
bind
(
L_multiply
);
subl
(
zlen
,
2
);
movq
(
sum
,
Address
(
z
,
zlen
,
Address
::
times_4
,
0
));
// Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry.
if
(
UseBMI2Instructions
)
{
multiply_add_64_bmi2
(
sum
,
op1
,
op2
,
carry
,
tmp2
);
}
else
{
multiply_add_64
(
sum
,
op1
,
op2
,
carry
,
rdxReg
,
raxReg
);
}
movq
(
Address
(
z
,
zlen
,
Address
::
times_4
,
0
),
sum
);
jmp
(
L_third_loop
);
bind
(
L_third_loop_exit
);
// Fourth loop
// Add 64 bit long carry into z with carry propogation.
// Uses offsetted zlen.
add_one_64
(
z
,
zlen
,
carry
,
tmp1
);
pop
(
len
);
pop
(
zlen
);
jmp
(
L_second_loop
);
// Next infrequent code is moved outside loops.
bind
(
L_last_x
);
movl
(
op1
,
Address
(
x
,
0
));
jmp
(
L_multiply
);
bind
(
L_second_loop_exit
);
pop
(
len
);
pop
(
zlen
);
pop
(
len
);
pop
(
zlen
);
// Fifth loop
// Shift z left 1 bit.
lshift_by_1
(
x
,
len
,
z
,
zlen
,
tmp1
,
tmp2
,
tmp3
,
tmp4
);
// z[zlen-1] |= x[len-1] & 1;
movl
(
tmp3
,
Address
(
x
,
len
,
Address
::
times_4
,
-
4
));
andl
(
tmp3
,
1
);
orl
(
Address
(
z
,
zlen
,
Address
::
times_4
,
-
4
),
tmp3
);
pop
(
tmp5
);
pop
(
tmp4
);
pop
(
tmp3
);
pop
(
tmp2
);
pop
(
tmp1
);
}
/**
* Helper function for mul_add()
* Multiply the in[] by int k and add to out[] starting at offset offs using
* 128 bit by 32 bit multiply and return the carry in tmp5.
* Only quad int aligned length of in[] is operated on in this function.
* k is in rdxReg for BMI2Instructions, for others it is in tmp2.
* This function preserves out, in and k registers.
* len and offset point to the appropriate index in "in" & "out" correspondingly
* tmp5 has the carry.
* other registers are temporary and are modified.
*
*/
void
MacroAssembler
::
mul_add_128_x_32_loop
(
Register
out
,
Register
in
,
Register
offset
,
Register
len
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
)
{
Label
L_first_loop
,
L_first_loop_exit
;
movl
(
tmp1
,
len
);
shrl
(
tmp1
,
2
);
bind
(
L_first_loop
);
subl
(
tmp1
,
1
);
jccb
(
Assembler
::
negative
,
L_first_loop_exit
);
subl
(
len
,
4
);
subl
(
offset
,
4
);
Register
op2
=
tmp2
;
const
Register
sum
=
tmp3
;
const
Register
op1
=
tmp4
;
const
Register
carry
=
tmp5
;
if
(
UseBMI2Instructions
)
{
op2
=
rdxReg
;
}
movq
(
op1
,
Address
(
in
,
len
,
Address
::
times_4
,
8
));
rorq
(
op1
,
32
);
movq
(
sum
,
Address
(
out
,
offset
,
Address
::
times_4
,
8
));
rorq
(
sum
,
32
);
if
(
UseBMI2Instructions
)
{
multiply_add_64_bmi2
(
sum
,
op1
,
op2
,
carry
,
raxReg
);
}
else
{
multiply_add_64
(
sum
,
op1
,
op2
,
carry
,
rdxReg
,
raxReg
);
}
// Store back in big endian from little endian
rorq
(
sum
,
0x20
);
movq
(
Address
(
out
,
offset
,
Address
::
times_4
,
8
),
sum
);
movq
(
op1
,
Address
(
in
,
len
,
Address
::
times_4
,
0
));
rorq
(
op1
,
32
);
movq
(
sum
,
Address
(
out
,
offset
,
Address
::
times_4
,
0
));
rorq
(
sum
,
32
);
if
(
UseBMI2Instructions
)
{
multiply_add_64_bmi2
(
sum
,
op1
,
op2
,
carry
,
raxReg
);
}
else
{
multiply_add_64
(
sum
,
op1
,
op2
,
carry
,
rdxReg
,
raxReg
);
}
// Store back in big endian from little endian
rorq
(
sum
,
0x20
);
movq
(
Address
(
out
,
offset
,
Address
::
times_4
,
0
),
sum
);
jmp
(
L_first_loop
);
bind
(
L_first_loop_exit
);
}
/**
* Code for BigInteger::mulAdd() intrinsic
*
* rdi: out
* rsi: in
* r11: offs (out.length - offset)
* rcx: len
* r8: k
* r12: tmp1
* r13: tmp2
* r14: tmp3
* r15: tmp4
* rbx: tmp5
* Multiply the in[] by word k and add to out[], return the carry in rax
*/
void
MacroAssembler
::
mul_add
(
Register
out
,
Register
in
,
Register
offs
,
Register
len
,
Register
k
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
)
{
Label
L_carry
,
L_last_in
,
L_done
;
// carry = 0;
// for (int j=len-1; j >= 0; j--) {
// long product = (in[j] & LONG_MASK) * kLong +
// (out[offs] & LONG_MASK) + carry;
// out[offs--] = (int)product;
// carry = product >>> 32;
// }
//
push
(
tmp1
);
push
(
tmp2
);
push
(
tmp3
);
push
(
tmp4
);
push
(
tmp5
);
Register
op2
=
tmp2
;
const
Register
sum
=
tmp3
;
const
Register
op1
=
tmp4
;
const
Register
carry
=
tmp5
;
if
(
UseBMI2Instructions
)
{
op2
=
rdxReg
;
movl
(
op2
,
k
);
}
else
{
movl
(
op2
,
k
);
}
xorq
(
carry
,
carry
);
//First loop
//Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply
//The carry is in tmp5
mul_add_128_x_32_loop
(
out
,
in
,
offs
,
len
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
rdxReg
,
raxReg
);
//Multiply the trailing in[] entry using 64 bit by 32 bit, if any
decrementl
(
len
);
jccb
(
Assembler
::
negative
,
L_carry
);
decrementl
(
len
);
jccb
(
Assembler
::
negative
,
L_last_in
);
movq
(
op1
,
Address
(
in
,
len
,
Address
::
times_4
,
0
));
rorq
(
op1
,
32
);
subl
(
offs
,
2
);
movq
(
sum
,
Address
(
out
,
offs
,
Address
::
times_4
,
0
));
rorq
(
sum
,
32
);
if
(
UseBMI2Instructions
)
{
multiply_add_64_bmi2
(
sum
,
op1
,
op2
,
carry
,
raxReg
);
}
else
{
multiply_add_64
(
sum
,
op1
,
op2
,
carry
,
rdxReg
,
raxReg
);
}
// Store back in big endian from little endian
rorq
(
sum
,
0x20
);
movq
(
Address
(
out
,
offs
,
Address
::
times_4
,
0
),
sum
);
testl
(
len
,
len
);
jccb
(
Assembler
::
zero
,
L_carry
);
//Multiply the last in[] entry, if any
bind
(
L_last_in
);
movl
(
op1
,
Address
(
in
,
0
));
movl
(
sum
,
Address
(
out
,
offs
,
Address
::
times_4
,
-
4
));
movl
(
raxReg
,
k
);
mull
(
op1
);
//tmp4 * eax -> edx:eax
addl
(
sum
,
carry
);
adcl
(
rdxReg
,
0
);
addl
(
sum
,
raxReg
);
adcl
(
rdxReg
,
0
);
movl
(
carry
,
rdxReg
);
movl
(
Address
(
out
,
offs
,
Address
::
times_4
,
-
4
),
sum
);
bind
(
L_carry
);
//return tmp5/carry as carry in rax
movl
(
rax
,
carry
);
bind
(
L_done
);
pop
(
tmp5
);
pop
(
tmp4
);
pop
(
tmp3
);
pop
(
tmp2
);
pop
(
tmp1
);
}
#endif
/**
...
...
src/cpu/x86/vm/macroAssembler_x86.hpp
浏览文件 @
de15436a
...
...
@@ -1241,6 +1241,25 @@ public:
Register
carry2
);
void
multiply_to_len
(
Register
x
,
Register
xlen
,
Register
y
,
Register
ylen
,
Register
z
,
Register
zlen
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
);
void
square_rshift
(
Register
x
,
Register
len
,
Register
z
,
Register
tmp1
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
);
void
multiply_add_64_bmi2
(
Register
sum
,
Register
op1
,
Register
op2
,
Register
carry
,
Register
tmp2
);
void
multiply_add_64
(
Register
sum
,
Register
op1
,
Register
op2
,
Register
carry
,
Register
rdxReg
,
Register
raxReg
);
void
add_one_64
(
Register
z
,
Register
zlen
,
Register
carry
,
Register
tmp1
);
void
lshift_by_1
(
Register
x
,
Register
len
,
Register
z
,
Register
zlen
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
);
void
square_to_len
(
Register
x
,
Register
len
,
Register
z
,
Register
zlen
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
);
void
mul_add_128_x_32_loop
(
Register
out
,
Register
in
,
Register
offset
,
Register
len
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
);
void
mul_add
(
Register
out
,
Register
in
,
Register
offset
,
Register
len
,
Register
k
,
Register
tmp1
,
Register
tmp2
,
Register
tmp3
,
Register
tmp4
,
Register
tmp5
,
Register
rdxReg
,
Register
raxReg
);
#endif
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
...
...
src/cpu/x86/vm/sharedRuntime_x86_64.cpp
浏览文件 @
de15436a
...
...
@@ -23,6 +23,9 @@
*/
#include "precompiled.hpp"
#ifndef _WINDOWS
#include "alloca.h"
#endif
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/debugInfoRec.hpp"
...
...
@@ -3966,6 +3969,256 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
}
//------------------------------Montgomery multiplication------------------------
//
#ifndef _WINDOWS
#define ASM_SUBTRACT
#ifdef ASM_SUBTRACT
// Subtract 0:b from carry:a. Return carry.
static
unsigned
long
sub
(
unsigned
long
a
[],
unsigned
long
b
[],
unsigned
long
carry
,
long
len
)
{
long
i
=
0
,
cnt
=
len
;
unsigned
long
tmp
;
asm
volatile
(
"clc; "
"0: ; "
"mov (%[b], %[i], 8), %[tmp]; "
"sbb %[tmp], (%[a], %[i], 8); "
"inc %[i]; dec %[cnt]; "
"jne 0b; "
"mov %[carry], %[tmp]; sbb $0, %[tmp]; "
:
[
i
]
"+r"
(
i
),
[
cnt
]
"+r"
(
cnt
),
[
tmp
]
"=&r"
(
tmp
)
:
[
a
]
"r"
(
a
),
[
b
]
"r"
(
b
),
[
carry
]
"r"
(
carry
)
:
"memory"
);
return
tmp
;
}
#else // ASM_SUBTRACT
typedef
int
__attribute__
((
mode
(
TI
)))
int128
;
// Subtract 0:b from carry:a. Return carry.
static
unsigned
long
sub
(
unsigned
long
a
[],
unsigned
long
b
[],
unsigned
long
carry
,
int
len
)
{
int128
tmp
=
0
;
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
tmp
+=
a
[
i
];
tmp
-=
b
[
i
];
a
[
i
]
=
tmp
;
tmp
>>=
64
;
assert
(
-
1
<=
tmp
&&
tmp
<=
0
,
"invariant"
);
}
return
tmp
+
carry
;
}
#endif // ! ASM_SUBTRACT
// Multiply (unsigned) Long A by Long B, accumulating the double-
// length result into the accumulator formed of T0, T1, and T2.
#define MACC(A, B, T0, T1, T2) \
do { \
unsigned long hi, lo; \
asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
: "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
: "r"(A), "a"(B) : "cc"); \
} while(0)
// As above, but add twice the double-length result into the
// accumulator.
#define MACC2(A, B, T0, T1, T2) \
do { \
unsigned long hi, lo; \
asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;" \
"add %%rax, %2; adc %%rdx, %3; adc $0, %4" \
: "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2) \
: "r"(A), "a"(B) : "cc"); \
} while(0)
// Fast Montgomery multiplication. The derivation of the algorithm is
// in A Cryptographic Library for the Motorola DSP56000,
// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
static
void
__attribute__
((
noinline
))
montgomery_multiply
(
unsigned
long
a
[],
unsigned
long
b
[],
unsigned
long
n
[],
unsigned
long
m
[],
unsigned
long
inv
,
int
len
)
{
unsigned
long
t0
=
0
,
t1
=
0
,
t2
=
0
;
// Triple-precision accumulator
int
i
;
assert
(
inv
*
n
[
0
]
==
-
1UL
,
"broken inverse in Montgomery multiply"
);
for
(
i
=
0
;
i
<
len
;
i
++
)
{
int
j
;
for
(
j
=
0
;
j
<
i
;
j
++
)
{
MACC
(
a
[
j
],
b
[
i
-
j
],
t0
,
t1
,
t2
);
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
MACC
(
a
[
i
],
b
[
0
],
t0
,
t1
,
t2
);
m
[
i
]
=
t0
*
inv
;
MACC
(
m
[
i
],
n
[
0
],
t0
,
t1
,
t2
);
assert
(
t0
==
0
,
"broken Montgomery multiply"
);
t0
=
t1
;
t1
=
t2
;
t2
=
0
;
}
for
(
i
=
len
;
i
<
2
*
len
;
i
++
)
{
int
j
;
for
(
j
=
i
-
len
+
1
;
j
<
len
;
j
++
)
{
MACC
(
a
[
j
],
b
[
i
-
j
],
t0
,
t1
,
t2
);
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
m
[
i
-
len
]
=
t0
;
t0
=
t1
;
t1
=
t2
;
t2
=
0
;
}
while
(
t0
)
t0
=
sub
(
m
,
n
,
t0
,
len
);
}
// Fast Montgomery squaring. This uses asymptotically 25% fewer
// multiplies so it should be up to 25% faster than Montgomery
// multiplication. However, its loop control is more complex and it
// may actually run slower on some machines.
static
void
__attribute__
((
noinline
))
montgomery_square
(
unsigned
long
a
[],
unsigned
long
n
[],
unsigned
long
m
[],
unsigned
long
inv
,
int
len
)
{
unsigned
long
t0
=
0
,
t1
=
0
,
t2
=
0
;
// Triple-precision accumulator
int
i
;
assert
(
inv
*
n
[
0
]
==
-
1UL
,
"broken inverse in Montgomery multiply"
);
for
(
i
=
0
;
i
<
len
;
i
++
)
{
int
j
;
int
end
=
(
i
+
1
)
/
2
;
for
(
j
=
0
;
j
<
end
;
j
++
)
{
MACC2
(
a
[
j
],
a
[
i
-
j
],
t0
,
t1
,
t2
);
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
if
((
i
&
1
)
==
0
)
{
MACC
(
a
[
j
],
a
[
j
],
t0
,
t1
,
t2
);
}
for
(;
j
<
i
;
j
++
)
{
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
m
[
i
]
=
t0
*
inv
;
MACC
(
m
[
i
],
n
[
0
],
t0
,
t1
,
t2
);
assert
(
t0
==
0
,
"broken Montgomery square"
);
t0
=
t1
;
t1
=
t2
;
t2
=
0
;
}
for
(
i
=
len
;
i
<
2
*
len
;
i
++
)
{
int
start
=
i
-
len
+
1
;
int
end
=
start
+
(
len
-
start
)
/
2
;
int
j
;
for
(
j
=
start
;
j
<
end
;
j
++
)
{
MACC2
(
a
[
j
],
a
[
i
-
j
],
t0
,
t1
,
t2
);
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
if
((
i
&
1
)
==
0
)
{
MACC
(
a
[
j
],
a
[
j
],
t0
,
t1
,
t2
);
}
for
(;
j
<
len
;
j
++
)
{
MACC
(
m
[
j
],
n
[
i
-
j
],
t0
,
t1
,
t2
);
}
m
[
i
-
len
]
=
t0
;
t0
=
t1
;
t1
=
t2
;
t2
=
0
;
}
while
(
t0
)
t0
=
sub
(
m
,
n
,
t0
,
len
);
}
// Swap words in a longword.
static
unsigned
long
swap
(
unsigned
long
x
)
{
return
(
x
<<
32
)
|
(
x
>>
32
);
}
// Copy len longwords from s to d, word-swapping as we go. The
// destination array is reversed.
static
void
reverse_words
(
unsigned
long
*
s
,
unsigned
long
*
d
,
int
len
)
{
d
+=
len
;
while
(
len
--
>
0
)
{
d
--
;
*
d
=
swap
(
*
s
);
s
++
;
}
}
// The threshold at which squaring is advantageous was determined
// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
#define MONTGOMERY_SQUARING_THRESHOLD 64
void
SharedRuntime
::
montgomery_multiply
(
jint
*
a_ints
,
jint
*
b_ints
,
jint
*
n_ints
,
jint
len
,
jlong
inv
,
jint
*
m_ints
)
{
assert
(
len
%
2
==
0
,
"array length in montgomery_multiply must be even"
);
int
longwords
=
len
/
2
;
// Make very sure we don't use so much space that the stack might
// overflow. 512 jints corresponds to an 16384-bit integer and
// will use here a total of 8k bytes of stack space.
int
total_allocation
=
longwords
*
sizeof
(
unsigned
long
)
*
4
;
guarantee
(
total_allocation
<=
8192
,
"must be"
);
unsigned
long
*
scratch
=
(
unsigned
long
*
)
alloca
(
total_allocation
);
// Local scratch arrays
unsigned
long
*
a
=
scratch
+
0
*
longwords
,
*
b
=
scratch
+
1
*
longwords
,
*
n
=
scratch
+
2
*
longwords
,
*
m
=
scratch
+
3
*
longwords
;
reverse_words
((
unsigned
long
*
)
a_ints
,
a
,
longwords
);
reverse_words
((
unsigned
long
*
)
b_ints
,
b
,
longwords
);
reverse_words
((
unsigned
long
*
)
n_ints
,
n
,
longwords
);
::
montgomery_multiply
(
a
,
b
,
n
,
m
,
(
unsigned
long
)
inv
,
longwords
);
reverse_words
(
m
,
(
unsigned
long
*
)
m_ints
,
longwords
);
}
void
SharedRuntime
::
montgomery_square
(
jint
*
a_ints
,
jint
*
n_ints
,
jint
len
,
jlong
inv
,
jint
*
m_ints
)
{
assert
(
len
%
2
==
0
,
"array length in montgomery_square must be even"
);
int
longwords
=
len
/
2
;
// Make very sure we don't use so much space that the stack might
// overflow. 512 jints corresponds to an 16384-bit integer and
// will use here a total of 6k bytes of stack space.
int
total_allocation
=
longwords
*
sizeof
(
unsigned
long
)
*
3
;
guarantee
(
total_allocation
<=
8192
,
"must be"
);
unsigned
long
*
scratch
=
(
unsigned
long
*
)
alloca
(
total_allocation
);
// Local scratch arrays
unsigned
long
*
a
=
scratch
+
0
*
longwords
,
*
n
=
scratch
+
1
*
longwords
,
*
m
=
scratch
+
2
*
longwords
;
reverse_words
((
unsigned
long
*
)
a_ints
,
a
,
longwords
);
reverse_words
((
unsigned
long
*
)
n_ints
,
n
,
longwords
);
//montgomery_square fails to pass BigIntegerTest on solaris amd64
//on jdk7 and jdk8.
#ifndef SOLARIS
if
(
len
>=
MONTGOMERY_SQUARING_THRESHOLD
)
{
#else
if
(
0
)
{
#endif
::
montgomery_square
(
a
,
n
,
m
,
(
unsigned
long
)
inv
,
longwords
);
}
else
{
::
montgomery_multiply
(
a
,
a
,
n
,
m
,
(
unsigned
long
)
inv
,
longwords
);
}
reverse_words
(
m
,
(
unsigned
long
*
)
m_ints
,
longwords
);
}
#endif // WINDOWS
#ifdef COMPILER2
// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
//
...
...
src/cpu/x86/vm/stubGenerator_x86_64.cpp
浏览文件 @
de15436a
...
...
@@ -3743,6 +3743,107 @@ class StubGenerator: public StubCodeGenerator {
return
start
;
}
/**
* Arguments:
*
// Input:
// c_rarg0 - x address
// c_rarg1 - x length
// c_rarg2 - z address
// c_rarg3 - z lenth
*
*/
address
generate_squareToLen
()
{
__
align
(
CodeEntryAlignment
);
StubCodeMark
mark
(
this
,
"StubRoutines"
,
"squareToLen"
);
address
start
=
__
pc
();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
const
Register
x
=
rdi
;
const
Register
len
=
rsi
;
const
Register
z
=
r8
;
const
Register
zlen
=
rcx
;
const
Register
tmp1
=
r12
;
const
Register
tmp2
=
r13
;
const
Register
tmp3
=
r14
;
const
Register
tmp4
=
r15
;
const
Register
tmp5
=
rbx
;
BLOCK_COMMENT
(
"Entry:"
);
__
enter
();
// required for proper stackwalking of RuntimeStub frame
setup_arg_regs
(
4
);
// x => rdi, len => rsi, z => rdx
// zlen => rcx
// r9 and r10 may be used to save non-volatile registers
__
movptr
(
r8
,
rdx
);
__
square_to_len
(
x
,
len
,
z
,
zlen
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
rdx
,
rax
);
restore_arg_regs
();
__
leave
();
// required for proper stackwalking of RuntimeStub frame
__
ret
(
0
);
return
start
;
}
/**
* Arguments:
*
* Input:
* c_rarg0 - out address
* c_rarg1 - in address
* c_rarg2 - offset
* c_rarg3 - len
* not Win64
* c_rarg4 - k
* Win64
* rsp+40 - k
*/
address
generate_mulAdd
()
{
__
align
(
CodeEntryAlignment
);
StubCodeMark
mark
(
this
,
"StubRoutines"
,
"mulAdd"
);
address
start
=
__
pc
();
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
// Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
const
Register
out
=
rdi
;
const
Register
in
=
rsi
;
const
Register
offset
=
r11
;
const
Register
len
=
rcx
;
const
Register
k
=
r8
;
// Next registers will be saved on stack in mul_add().
const
Register
tmp1
=
r12
;
const
Register
tmp2
=
r13
;
const
Register
tmp3
=
r14
;
const
Register
tmp4
=
r15
;
const
Register
tmp5
=
rbx
;
BLOCK_COMMENT
(
"Entry:"
);
__
enter
();
// required for proper stackwalking of RuntimeStub frame
setup_arg_regs
(
4
);
// out => rdi, in => rsi, offset => rdx
// len => rcx, k => r8
// r9 and r10 may be used to save non-volatile registers
#ifdef _WIN64
// last argument is on stack on Win64
__
movl
(
k
,
Address
(
rsp
,
6
*
wordSize
));
#endif
__
movptr
(
r11
,
rdx
);
// move offset in rdx to offset(r11)
__
mul_add
(
out
,
in
,
offset
,
len
,
k
,
tmp1
,
tmp2
,
tmp3
,
tmp4
,
tmp5
,
rdx
,
rax
);
restore_arg_regs
();
__
leave
();
// required for proper stackwalking of RuntimeStub frame
__
ret
(
0
);
return
start
;
}
#undef __
#define __ masm->
...
...
@@ -3987,7 +4088,24 @@ class StubGenerator: public StubCodeGenerator {
if
(
UseMultiplyToLenIntrinsic
)
{
StubRoutines
::
_multiplyToLen
=
generate_multiplyToLen
();
}
#endif
if
(
UseSquareToLenIntrinsic
)
{
StubRoutines
::
_squareToLen
=
generate_squareToLen
();
}
if
(
UseMulAddIntrinsic
)
{
StubRoutines
::
_mulAdd
=
generate_mulAdd
();
}
#ifndef _WINDOWS
if
(
UseMontgomeryMultiplyIntrinsic
)
{
StubRoutines
::
_montgomeryMultiply
=
CAST_FROM_FN_PTR
(
address
,
SharedRuntime
::
montgomery_multiply
);
}
if
(
UseMontgomerySquareIntrinsic
)
{
StubRoutines
::
_montgomerySquare
=
CAST_FROM_FN_PTR
(
address
,
SharedRuntime
::
montgomery_square
);
}
#endif // WINDOWS
#endif // COMPILER2
}
public:
...
...
src/cpu/x86/vm/stubRoutines_x86_64.hpp
浏览文件 @
de15436a
...
...
@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _
enum
platform_dependent_constants
{
code_size1
=
19000
,
// simply increase if too small (assembler will crash if too small)
code_size2
=
2
2
000
// simply increase if too small (assembler will crash if too small)
code_size2
=
2
3
000
// simply increase if too small (assembler will crash if too small)
};
class
x86
{
...
...
src/cpu/x86/vm/vm_version_x86.cpp
浏览文件 @
de15436a
...
...
@@ -703,6 +703,18 @@ void VM_Version::get_processor_features() {
if
(
FLAG_IS_DEFAULT
(
UseMultiplyToLenIntrinsic
))
{
UseMultiplyToLenIntrinsic
=
true
;
}
if
(
FLAG_IS_DEFAULT
(
UseSquareToLenIntrinsic
))
{
UseSquareToLenIntrinsic
=
false
;
}
if
(
FLAG_IS_DEFAULT
(
UseMulAddIntrinsic
))
{
UseMulAddIntrinsic
=
false
;
}
if
(
FLAG_IS_DEFAULT
(
UseMontgomeryMultiplyIntrinsic
))
{
UseMontgomeryMultiplyIntrinsic
=
false
;
}
if
(
FLAG_IS_DEFAULT
(
UseMontgomerySquareIntrinsic
))
{
UseMontgomerySquareIntrinsic
=
false
;
}
#else
if
(
UseMultiplyToLenIntrinsic
)
{
if
(
!
FLAG_IS_DEFAULT
(
UseMultiplyToLenIntrinsic
))
{
...
...
@@ -710,6 +722,30 @@ void VM_Version::get_processor_features() {
}
FLAG_SET_DEFAULT
(
UseMultiplyToLenIntrinsic
,
false
);
}
if
(
UseSquareToLenIntrinsic
)
{
if
(
!
FLAG_IS_DEFAULT
(
UseSquareToLenIntrinsic
))
{
warning
(
"squareToLen intrinsic is not available in 32-bit VM"
);
}
FLAG_SET_DEFAULT
(
UseSquareToLenIntrinsic
,
false
);
}
if
(
UseMulAddIntrinsic
)
{
if
(
!
FLAG_IS_DEFAULT
(
UseMulAddIntrinsic
))
{
warning
(
"mulAdd intrinsic is not available in 32-bit VM"
);
}
FLAG_SET_DEFAULT
(
UseMulAddIntrinsic
,
false
);
}
if
(
UseMontgomeryMultiplyIntrinsic
)
{
if
(
!
FLAG_IS_DEFAULT
(
UseMontgomeryMultiplyIntrinsic
))
{
warning
(
"montgomeryMultiply intrinsic is not available in 32-bit VM"
);
}
FLAG_SET_DEFAULT
(
UseMontgomeryMultiplyIntrinsic
,
false
);
}
if
(
UseMontgomerySquareIntrinsic
)
{
if
(
!
FLAG_IS_DEFAULT
(
UseMontgomerySquareIntrinsic
))
{
warning
(
"montgomerySquare intrinsic is not available in 32-bit VM"
);
}
FLAG_SET_DEFAULT
(
UseMontgomerySquareIntrinsic
,
false
);
}
#endif
#endif // COMPILER2
...
...
src/share/vm/classfile/vmSymbols.hpp
浏览文件 @
de15436a
...
...
@@ -793,10 +793,26 @@
do_signature(encodeISOArray_signature, "([CI[BII)I") \
\
do_class(java_math_BigInteger, "java/math/BigInteger") \
do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_
R
) \
do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_
S
) \
do_name( multiplyToLen_name, "multiplyToLen") \
do_signature(multiplyToLen_signature, "([II[II[I)[I") \
\
do_intrinsic(_squareToLen, java_math_BigInteger, squareToLen_name, squareToLen_signature, F_S) \
do_name( squareToLen_name, "implSquareToLen") \
do_signature(squareToLen_signature, "([II[II)[I") \
\
do_intrinsic(_mulAdd, java_math_BigInteger, mulAdd_name, mulAdd_signature, F_S) \
do_name( mulAdd_name, "implMulAdd") \
do_signature(mulAdd_signature, "([I[IIII)I") \
\
do_intrinsic(_montgomeryMultiply, java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \
do_name( montgomeryMultiply_name, "implMontgomeryMultiply") \
do_signature(montgomeryMultiply_signature, "([I[I[IIJ[I)[I") \
\
do_intrinsic(_montgomerySquare, java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S) \
do_name( montgomerySquare_name, "implMontgomerySquare") \
do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \
\
/* java/lang/ref/Reference */
\
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\
...
...
src/share/vm/opto/c2_globals.hpp
浏览文件 @
de15436a
...
...
@@ -662,6 +662,18 @@
product(bool, UseMultiplyToLenIntrinsic, false, \
"Enables intrinsification of BigInteger.multiplyToLen()") \
\
product(bool, UseSquareToLenIntrinsic, false, \
"Enables intrinsification of BigInteger.squareToLen()") \
\
product(bool, UseMulAddIntrinsic, false, \
"Enables intrinsification of BigInteger.mulAdd()") \
\
product(bool, UseMontgomeryMultiplyIntrinsic, false, \
"Enables intrinsification of BigInteger.montgomeryMultiply()") \
\
product(bool, UseMontgomerySquareIntrinsic, false, \
"Enables intrinsification of BigInteger.montgomerySquare()") \
\
product(bool, UseTypeSpeculation, true, \
"Speculatively propagate types from profiles") \
\
...
...
src/share/vm/opto/escape.cpp
浏览文件 @
de15436a
...
...
@@ -958,8 +958,12 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"sha256_implCompressMB"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"sha512_implCompress"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"sha512_implCompressMB"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"multiplyToLen"
)
==
0
)
)))
{
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"multiplyToLen"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"squareToLen"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"mulAdd"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"montgomery_multiply"
)
==
0
||
strcmp
(
call
->
as_CallLeaf
()
->
_name
,
"montgomery_square"
)
==
0
)
)))
{
call
->
dump
();
fatal
(
err_msg_res
(
"EA unexpected CallLeaf %s"
,
call
->
as_CallLeaf
()
->
_name
));
}
...
...
src/share/vm/opto/library_call.cpp
浏览文件 @
de15436a
...
...
@@ -324,6 +324,10 @@ class LibraryCallKit : public GraphKit {
bool
inline_updateBytesCRC32
();
bool
inline_updateByteBufferCRC32
();
bool
inline_multiplyToLen
();
bool
inline_squareToLen
();
bool
inline_mulAdd
();
bool
inline_montgomeryMultiply
();
bool
inline_montgomerySquare
();
bool
inline_profileBoolean
();
};
...
...
@@ -527,6 +531,21 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
if
(
!
UseMultiplyToLenIntrinsic
)
return
NULL
;
break
;
case
vmIntrinsics
::
_squareToLen
:
if
(
!
UseSquareToLenIntrinsic
)
return
NULL
;
break
;
case
vmIntrinsics
::
_mulAdd
:
if
(
!
UseMulAddIntrinsic
)
return
NULL
;
break
;
case
vmIntrinsics
::
_montgomeryMultiply
:
if
(
!
UseMontgomeryMultiplyIntrinsic
)
return
NULL
;
break
;
case
vmIntrinsics
::
_montgomerySquare
:
if
(
!
UseMontgomerySquareIntrinsic
)
return
NULL
;
break
;
case
vmIntrinsics
::
_cipherBlockChaining_encryptAESCrypt
:
case
vmIntrinsics
::
_cipherBlockChaining_decryptAESCrypt
:
if
(
!
UseAESIntrinsics
)
return
NULL
;
...
...
@@ -927,6 +946,17 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case
vmIntrinsics
::
_multiplyToLen
:
return
inline_multiplyToLen
();
case
vmIntrinsics
::
_squareToLen
:
return
inline_squareToLen
();
case
vmIntrinsics
::
_mulAdd
:
return
inline_mulAdd
();
case
vmIntrinsics
::
_montgomeryMultiply
:
return
inline_montgomeryMultiply
();
case
vmIntrinsics
::
_montgomerySquare
:
return
inline_montgomerySquare
();
case
vmIntrinsics
::
_encodeISOArray
:
return
inline_encodeISOArray
();
...
...
@@ -5767,11 +5797,12 @@ bool LibraryCallKit::inline_multiplyToLen() {
assert
(
callee
()
->
signature
()
->
size
()
==
5
,
"multiplyToLen has 5 parameters"
);
Node
*
x
=
argument
(
1
);
Node
*
xlen
=
argument
(
2
);
Node
*
y
=
argument
(
3
);
Node
*
ylen
=
argument
(
4
);
Node
*
z
=
argument
(
5
);
// no receiver because it is a static method
Node
*
x
=
argument
(
0
);
Node
*
xlen
=
argument
(
1
);
Node
*
y
=
argument
(
2
);
Node
*
ylen
=
argument
(
3
);
Node
*
z
=
argument
(
4
);
const
Type
*
x_type
=
x
->
Value
(
&
_gvn
);
const
Type
*
y_type
=
y
->
Value
(
&
_gvn
);
...
...
@@ -5856,6 +5887,215 @@ bool LibraryCallKit::inline_multiplyToLen() {
return
true
;
}
//-------------inline_squareToLen------------------------------------
bool
LibraryCallKit
::
inline_squareToLen
()
{
assert
(
UseSquareToLenIntrinsic
,
"not implementated on this platform"
);
address
stubAddr
=
StubRoutines
::
squareToLen
();
if
(
stubAddr
==
NULL
)
{
return
false
;
// Intrinsic's stub is not implemented on this platform
}
const
char
*
stubName
=
"squareToLen"
;
assert
(
callee
()
->
signature
()
->
size
()
==
4
,
"implSquareToLen has 4 parameters"
);
Node
*
x
=
argument
(
0
);
Node
*
len
=
argument
(
1
);
Node
*
z
=
argument
(
2
);
Node
*
zlen
=
argument
(
3
);
const
Type
*
x_type
=
x
->
Value
(
&
_gvn
);
const
Type
*
z_type
=
z
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_x
=
x_type
->
isa_aryptr
();
const
TypeAryPtr
*
top_z
=
z_type
->
isa_aryptr
();
if
(
top_x
==
NULL
||
top_x
->
klass
()
==
NULL
||
top_z
==
NULL
||
top_z
->
klass
()
==
NULL
)
{
// failed array check
return
false
;
}
BasicType
x_elem
=
x_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
z_elem
=
z_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
if
(
x_elem
!=
T_INT
||
z_elem
!=
T_INT
)
{
return
false
;
}
Node
*
x_start
=
array_element_address
(
x
,
intcon
(
0
),
x_elem
);
Node
*
z_start
=
array_element_address
(
z
,
intcon
(
0
),
z_elem
);
Node
*
call
=
make_runtime_call
(
RC_LEAF
|
RC_NO_FP
,
OptoRuntime
::
squareToLen_Type
(),
stubAddr
,
stubName
,
TypePtr
::
BOTTOM
,
x_start
,
len
,
z_start
,
zlen
);
set_result
(
z
);
return
true
;
}
//-------------inline_mulAdd------------------------------------------
bool
LibraryCallKit
::
inline_mulAdd
()
{
assert
(
UseMulAddIntrinsic
,
"not implementated on this platform"
);
address
stubAddr
=
StubRoutines
::
mulAdd
();
if
(
stubAddr
==
NULL
)
{
return
false
;
// Intrinsic's stub is not implemented on this platform
}
const
char
*
stubName
=
"mulAdd"
;
assert
(
callee
()
->
signature
()
->
size
()
==
5
,
"mulAdd has 5 parameters"
);
Node
*
out
=
argument
(
0
);
Node
*
in
=
argument
(
1
);
Node
*
offset
=
argument
(
2
);
Node
*
len
=
argument
(
3
);
Node
*
k
=
argument
(
4
);
const
Type
*
out_type
=
out
->
Value
(
&
_gvn
);
const
Type
*
in_type
=
in
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_out
=
out_type
->
isa_aryptr
();
const
TypeAryPtr
*
top_in
=
in_type
->
isa_aryptr
();
if
(
top_out
==
NULL
||
top_out
->
klass
()
==
NULL
||
top_in
==
NULL
||
top_in
->
klass
()
==
NULL
)
{
// failed array check
return
false
;
}
BasicType
out_elem
=
out_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
in_elem
=
in_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
if
(
out_elem
!=
T_INT
||
in_elem
!=
T_INT
)
{
return
false
;
}
Node
*
outlen
=
load_array_length
(
out
);
Node
*
new_offset
=
_gvn
.
transform
(
new
(
C
)
SubINode
(
outlen
,
offset
));
Node
*
out_start
=
array_element_address
(
out
,
intcon
(
0
),
out_elem
);
Node
*
in_start
=
array_element_address
(
in
,
intcon
(
0
),
in_elem
);
Node
*
call
=
make_runtime_call
(
RC_LEAF
|
RC_NO_FP
,
OptoRuntime
::
mulAdd_Type
(),
stubAddr
,
stubName
,
TypePtr
::
BOTTOM
,
out_start
,
in_start
,
new_offset
,
len
,
k
);
Node
*
result
=
_gvn
.
transform
(
new
(
C
)
ProjNode
(
call
,
TypeFunc
::
Parms
));
set_result
(
result
);
return
true
;
}
//-------------inline_montgomeryMultiply-----------------------------------
bool
LibraryCallKit
::
inline_montgomeryMultiply
()
{
address
stubAddr
=
StubRoutines
::
montgomeryMultiply
();
if
(
stubAddr
==
NULL
)
{
return
false
;
// Intrinsic's stub is not implemented on this platform
}
assert
(
UseMontgomeryMultiplyIntrinsic
,
"not implemented on this platform"
);
const
char
*
stubName
=
"montgomery_square"
;
assert
(
callee
()
->
signature
()
->
size
()
==
7
,
"montgomeryMultiply has 7 parameters"
);
Node
*
a
=
argument
(
0
);
Node
*
b
=
argument
(
1
);
Node
*
n
=
argument
(
2
);
Node
*
len
=
argument
(
3
);
Node
*
inv
=
argument
(
4
);
Node
*
m
=
argument
(
6
);
const
Type
*
a_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_a
=
a_type
->
isa_aryptr
();
const
Type
*
b_type
=
b
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_b
=
b_type
->
isa_aryptr
();
const
Type
*
n_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_n
=
n_type
->
isa_aryptr
();
const
Type
*
m_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_m
=
m_type
->
isa_aryptr
();
if
(
top_a
==
NULL
||
top_a
->
klass
()
==
NULL
||
top_b
==
NULL
||
top_b
->
klass
()
==
NULL
||
top_n
==
NULL
||
top_n
->
klass
()
==
NULL
||
top_m
==
NULL
||
top_m
->
klass
()
==
NULL
)
{
// failed array check
return
false
;
}
BasicType
a_elem
=
a_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
b_elem
=
b_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
n_elem
=
n_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
m_elem
=
m_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
if
(
a_elem
!=
T_INT
||
b_elem
!=
T_INT
||
n_elem
!=
T_INT
||
m_elem
!=
T_INT
)
{
return
false
;
}
// Make the call
{
Node
*
a_start
=
array_element_address
(
a
,
intcon
(
0
),
a_elem
);
Node
*
b_start
=
array_element_address
(
b
,
intcon
(
0
),
b_elem
);
Node
*
n_start
=
array_element_address
(
n
,
intcon
(
0
),
n_elem
);
Node
*
m_start
=
array_element_address
(
m
,
intcon
(
0
),
m_elem
);
Node
*
call
=
make_runtime_call
(
RC_LEAF
,
OptoRuntime
::
montgomeryMultiply_Type
(),
stubAddr
,
stubName
,
TypePtr
::
BOTTOM
,
a_start
,
b_start
,
n_start
,
len
,
inv
,
top
(),
m_start
);
set_result
(
m
);
}
return
true
;
}
bool
LibraryCallKit
::
inline_montgomerySquare
()
{
address
stubAddr
=
StubRoutines
::
montgomerySquare
();
if
(
stubAddr
==
NULL
)
{
return
false
;
// Intrinsic's stub is not implemented on this platform
}
assert
(
UseMontgomerySquareIntrinsic
,
"not implemented on this platform"
);
const
char
*
stubName
=
"montgomery_square"
;
assert
(
callee
()
->
signature
()
->
size
()
==
6
,
"montgomerySquare has 6 parameters"
);
Node
*
a
=
argument
(
0
);
Node
*
n
=
argument
(
1
);
Node
*
len
=
argument
(
2
);
Node
*
inv
=
argument
(
3
);
Node
*
m
=
argument
(
5
);
const
Type
*
a_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_a
=
a_type
->
isa_aryptr
();
const
Type
*
n_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_n
=
n_type
->
isa_aryptr
();
const
Type
*
m_type
=
a
->
Value
(
&
_gvn
);
const
TypeAryPtr
*
top_m
=
m_type
->
isa_aryptr
();
if
(
top_a
==
NULL
||
top_a
->
klass
()
==
NULL
||
top_n
==
NULL
||
top_n
->
klass
()
==
NULL
||
top_m
==
NULL
||
top_m
->
klass
()
==
NULL
)
{
// failed array check
return
false
;
}
BasicType
a_elem
=
a_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
n_elem
=
n_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
BasicType
m_elem
=
m_type
->
isa_aryptr
()
->
klass
()
->
as_array_klass
()
->
element_type
()
->
basic_type
();
if
(
a_elem
!=
T_INT
||
n_elem
!=
T_INT
||
m_elem
!=
T_INT
)
{
return
false
;
}
// Make the call
{
Node
*
a_start
=
array_element_address
(
a
,
intcon
(
0
),
a_elem
);
Node
*
n_start
=
array_element_address
(
n
,
intcon
(
0
),
n_elem
);
Node
*
m_start
=
array_element_address
(
m
,
intcon
(
0
),
m_elem
);
Node
*
call
=
make_runtime_call
(
RC_LEAF
,
OptoRuntime
::
montgomerySquare_Type
(),
stubAddr
,
stubName
,
TypePtr
::
BOTTOM
,
a_start
,
n_start
,
len
,
inv
,
top
(),
m_start
);
set_result
(
m
);
}
return
true
;
}
/**
* Calculate CRC32 for byte.
...
...
src/share/vm/opto/runtime.cpp
浏览文件 @
de15436a
...
...
@@ -956,6 +956,94 @@ const TypeFunc* OptoRuntime::multiplyToLen_Type() {
return
TypeFunc
::
make
(
domain
,
range
);
}
const
TypeFunc
*
OptoRuntime
::
squareToLen_Type
()
{
// create input type (domain)
int
num_args
=
4
;
int
argcnt
=
num_args
;
const
Type
**
fields
=
TypeTuple
::
fields
(
argcnt
);
int
argp
=
TypeFunc
::
Parms
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// x
fields
[
argp
++
]
=
TypeInt
::
INT
;
// len
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// z
fields
[
argp
++
]
=
TypeInt
::
INT
;
// zlen
assert
(
argp
==
TypeFunc
::
Parms
+
argcnt
,
"correct decoding"
);
const
TypeTuple
*
domain
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
argcnt
,
fields
);
// no result type needed
fields
=
TypeTuple
::
fields
(
1
);
fields
[
TypeFunc
::
Parms
+
0
]
=
NULL
;
const
TypeTuple
*
range
=
TypeTuple
::
make
(
TypeFunc
::
Parms
,
fields
);
return
TypeFunc
::
make
(
domain
,
range
);
}
// for mulAdd calls, 2 pointers and 3 ints, returning int
const
TypeFunc
*
OptoRuntime
::
mulAdd_Type
()
{
// create input type (domain)
int
num_args
=
5
;
int
argcnt
=
num_args
;
const
Type
**
fields
=
TypeTuple
::
fields
(
argcnt
);
int
argp
=
TypeFunc
::
Parms
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// out
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// in
fields
[
argp
++
]
=
TypeInt
::
INT
;
// offset
fields
[
argp
++
]
=
TypeInt
::
INT
;
// len
fields
[
argp
++
]
=
TypeInt
::
INT
;
// k
assert
(
argp
==
TypeFunc
::
Parms
+
argcnt
,
"correct decoding"
);
const
TypeTuple
*
domain
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
argcnt
,
fields
);
// returning carry (int)
fields
=
TypeTuple
::
fields
(
1
);
fields
[
TypeFunc
::
Parms
+
0
]
=
TypeInt
::
INT
;
const
TypeTuple
*
range
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
1
,
fields
);
return
TypeFunc
::
make
(
domain
,
range
);
}
const
TypeFunc
*
OptoRuntime
::
montgomeryMultiply_Type
()
{
// create input type (domain)
int
num_args
=
7
;
int
argcnt
=
num_args
;
const
Type
**
fields
=
TypeTuple
::
fields
(
argcnt
);
int
argp
=
TypeFunc
::
Parms
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// a
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// b
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// n
fields
[
argp
++
]
=
TypeInt
::
INT
;
// len
fields
[
argp
++
]
=
TypeLong
::
LONG
;
// inv
fields
[
argp
++
]
=
Type
::
HALF
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// result
assert
(
argp
==
TypeFunc
::
Parms
+
argcnt
,
"correct decoding"
);
const
TypeTuple
*
domain
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
argcnt
,
fields
);
// result type needed
fields
=
TypeTuple
::
fields
(
1
);
fields
[
TypeFunc
::
Parms
+
0
]
=
TypePtr
::
NOTNULL
;
const
TypeTuple
*
range
=
TypeTuple
::
make
(
TypeFunc
::
Parms
,
fields
);
return
TypeFunc
::
make
(
domain
,
range
);
}
const
TypeFunc
*
OptoRuntime
::
montgomerySquare_Type
()
{
// create input type (domain)
int
num_args
=
6
;
int
argcnt
=
num_args
;
const
Type
**
fields
=
TypeTuple
::
fields
(
argcnt
);
int
argp
=
TypeFunc
::
Parms
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// a
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// n
fields
[
argp
++
]
=
TypeInt
::
INT
;
// len
fields
[
argp
++
]
=
TypeLong
::
LONG
;
// inv
fields
[
argp
++
]
=
Type
::
HALF
;
fields
[
argp
++
]
=
TypePtr
::
NOTNULL
;
// result
assert
(
argp
==
TypeFunc
::
Parms
+
argcnt
,
"correct decoding"
);
const
TypeTuple
*
domain
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
argcnt
,
fields
);
// result type needed
fields
=
TypeTuple
::
fields
(
1
);
fields
[
TypeFunc
::
Parms
+
0
]
=
TypePtr
::
NOTNULL
;
const
TypeTuple
*
range
=
TypeTuple
::
make
(
TypeFunc
::
Parms
,
fields
);
return
TypeFunc
::
make
(
domain
,
range
);
}
//------------- Interpreter state access for on stack replacement
...
...
src/share/vm/opto/runtime.hpp
浏览文件 @
de15436a
...
...
@@ -305,6 +305,12 @@ private:
static
const
TypeFunc
*
multiplyToLen_Type
();
static
const
TypeFunc
*
squareToLen_Type
();
static
const
TypeFunc
*
mulAdd_Type
();
static
const
TypeFunc
*
montgomeryMultiply_Type
();
static
const
TypeFunc
*
montgomerySquare_Type
();
static
const
TypeFunc
*
updateBytesCRC32_Type
();
// leaf on stack replacement interpreter accessor types
...
...
src/share/vm/runtime/sharedRuntime.hpp
浏览文件 @
de15436a
...
...
@@ -145,6 +145,12 @@ class SharedRuntime: AllStatic {
static
double
dsqrt
(
double
f
);
#endif
// Montgomery multiplication
static
void
montgomery_multiply
(
jint
*
a_ints
,
jint
*
b_ints
,
jint
*
n_ints
,
jint
len
,
jlong
inv
,
jint
*
m_ints
);
static
void
montgomery_square
(
jint
*
a_ints
,
jint
*
n_ints
,
jint
len
,
jlong
inv
,
jint
*
m_ints
);
#ifdef __SOFTFP__
// C++ compiler generates soft float instructions as well as passing
// float and double in registers.
...
...
src/share/vm/runtime/stubRoutines.cpp
浏览文件 @
de15436a
...
...
@@ -136,6 +136,10 @@ address StubRoutines::_updateBytesCRC32 = NULL;
address
StubRoutines
::
_crc_table_adr
=
NULL
;
address
StubRoutines
::
_multiplyToLen
=
NULL
;
address
StubRoutines
::
_squareToLen
=
NULL
;
address
StubRoutines
::
_mulAdd
=
NULL
;
address
StubRoutines
::
_montgomeryMultiply
=
NULL
;
address
StubRoutines
::
_montgomerySquare
=
NULL
;
double
(
*
StubRoutines
::
_intrinsic_log
)(
double
)
=
NULL
;
double
(
*
StubRoutines
::
_intrinsic_log10
)(
double
)
=
NULL
;
...
...
src/share/vm/runtime/stubRoutines.hpp
浏览文件 @
de15436a
...
...
@@ -209,6 +209,10 @@ class StubRoutines: AllStatic {
static
address
_crc_table_adr
;
static
address
_multiplyToLen
;
static
address
_squareToLen
;
static
address
_mulAdd
;
static
address
_montgomeryMultiply
;
static
address
_montgomerySquare
;
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
...
...
@@ -367,6 +371,10 @@ class StubRoutines: AllStatic {
static
address
crc_table_addr
()
{
return
_crc_table_adr
;
}
static
address
multiplyToLen
()
{
return
_multiplyToLen
;
}
static
address
squareToLen
()
{
return
_squareToLen
;
}
static
address
mulAdd
()
{
return
_mulAdd
;
}
static
address
montgomeryMultiply
()
{
return
_montgomeryMultiply
;
}
static
address
montgomerySquare
()
{
return
_montgomerySquare
;
}
static
address
select_fill_function
(
BasicType
t
,
bool
aligned
,
const
char
*
&
name
);
...
...
src/share/vm/runtime/vmStructs.cpp
浏览文件 @
de15436a
...
...
@@ -813,6 +813,8 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
static_field(StubRoutines, _updateBytesCRC32, address) \
static_field(StubRoutines, _crc_table_adr, address) \
static_field(StubRoutines, _multiplyToLen, address) \
static_field(StubRoutines, _squareToLen, address) \
static_field(StubRoutines, _mulAdd, address) \
\
/*****************/
\
/* SharedRuntime */
\
...
...
test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java
0 → 100644
浏览文件 @
de15436a
//
// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2015, Red Hat Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
import
java.lang.invoke.MethodHandle
;
import
java.lang.invoke.MethodHandles
;
import
java.lang.invoke.MethodType
;
import
java.lang.reflect.Constructor
;
import
java.lang.reflect.Field
;
import
java.lang.reflect.Method
;
import
java.math.BigInteger
;
import
java.util.Arrays
;
import
java.util.Random
;
/**
* @test
* @bug 8130150
* @library /testlibrary
* @requires (os.simpleArch == "x64") & (os.family != "windows")
* @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments.
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic
* -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic
* -XX:-UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseMontgomerySquareIntrinsic
* -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
*/
public
class
MontgomeryMultiplyTest
{
static
final
MethodHandles
.
Lookup
lookup
=
MethodHandles
.
lookup
();
static
final
MethodHandle
montgomeryMultiplyHandle
,
montgomerySquareHandle
;
static
final
MethodHandle
bigIntegerConstructorHandle
;
static
final
Field
bigIntegerMagField
;
static
{
// Use reflection to gain access to the methods we want to test.
try
{
Method
m
=
BigInteger
.
class
.
getDeclaredMethod
(
"montgomeryMultiply"
,
/*a*/
int
[].
class
,
/*b*/
int
[].
class
,
/*n*/
int
[].
class
,
/*len*/
int
.
class
,
/*inv*/
long
.
class
,
/*product*/
int
[].
class
);
m
.
setAccessible
(
true
);
montgomeryMultiplyHandle
=
lookup
.
unreflect
(
m
);
m
=
BigInteger
.
class
.
getDeclaredMethod
(
"montgomerySquare"
,
/*a*/
int
[].
class
,
/*n*/
int
[].
class
,
/*len*/
int
.
class
,
/*inv*/
long
.
class
,
/*product*/
int
[].
class
);
m
.
setAccessible
(
true
);
montgomerySquareHandle
=
lookup
.
unreflect
(
m
);
Constructor
c
=
BigInteger
.
class
.
getDeclaredConstructor
(
int
.
class
,
int
[].
class
);
c
.
setAccessible
(
true
);
bigIntegerConstructorHandle
=
lookup
.
unreflectConstructor
(
c
);
bigIntegerMagField
=
BigInteger
.
class
.
getDeclaredField
(
"mag"
);
bigIntegerMagField
.
setAccessible
(
true
);
}
catch
(
Throwable
ex
)
{
throw
new
RuntimeException
(
ex
);
}
}
// Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare.
int
[]
montgomeryMultiply
(
int
[]
a
,
int
[]
b
,
int
[]
n
,
int
len
,
long
inv
,
int
[]
product
)
throws
Throwable
{
int
[]
result
=
(
a
==
b
)
?
(
int
[])
montgomerySquareHandle
.
invokeExact
(
a
,
n
,
len
,
inv
,
product
)
:
(
int
[])
montgomeryMultiplyHandle
.
invokeExact
(
a
,
b
,
n
,
len
,
inv
,
product
);
return
Arrays
.
copyOf
(
result
,
len
);
}
// Invoke the private constructor BigInteger(int[]).
BigInteger
newBigInteger
(
int
[]
val
)
throws
Throwable
{
return
(
BigInteger
)
bigIntegerConstructorHandle
.
invokeExact
(
1
,
val
);
}
// Get the private field BigInteger.mag
int
[]
mag
(
BigInteger
n
)
{
try
{
return
(
int
[])
bigIntegerMagField
.
get
(
n
);
}
catch
(
Exception
ex
)
{
throw
new
RuntimeException
(
ex
);
}
}
// Montgomery multiplication
// Calculate a * b * r^-1 mod n)
//
// R is a power of the word size
// N' = R^-1 mod N
//
// T := ab
// m := (T mod R)N' mod R [so 0 <= m < R]
// t := (T + mN)/R
// if t >= N then return t - N else return t
//
BigInteger
montgomeryMultiply
(
BigInteger
a
,
BigInteger
b
,
BigInteger
N
,
int
len
,
BigInteger
n_prime
)
throws
Throwable
{
BigInteger
T
=
a
.
multiply
(
b
);
BigInteger
R
=
BigInteger
.
ONE
.
shiftLeft
(
len
*
32
);
BigInteger
mask
=
R
.
subtract
(
BigInteger
.
ONE
);
BigInteger
m
=
(
T
.
and
(
mask
)).
multiply
(
n_prime
);
m
=
m
.
and
(
mask
);
// i.e. m.mod(R)
T
=
T
.
add
(
m
.
multiply
(
N
));
T
=
T
.
shiftRight
(
len
*
32
);
// i.e. T.divide(R)
if
(
T
.
compareTo
(
N
)
>
0
)
{
T
=
T
.
subtract
(
N
);
}
return
T
;
}
// Call the Montgomery multiply intrinsic.
BigInteger
montgomeryMultiply
(
int
[]
a_words
,
int
[]
b_words
,
int
[]
n_words
,
int
len
,
BigInteger
inv
)
throws
Throwable
{
BigInteger
t
=
montgomeryMultiply
(
newBigInteger
(
a_words
),
newBigInteger
(
b_words
),
newBigInteger
(
n_words
),
len
,
inv
);
return
t
;
}
// Check that the Montgomery multiply intrinsic returns the same
// result as the longhand calculation.
void
check
(
int
[]
a_words
,
int
[]
b_words
,
int
[]
n_words
,
int
len
,
BigInteger
inv
)
throws
Throwable
{
BigInteger
n
=
newBigInteger
(
n_words
);
BigInteger
slow
=
montgomeryMultiply
(
a_words
,
b_words
,
n_words
,
len
,
inv
);
BigInteger
fast
=
newBigInteger
(
montgomeryMultiply
(
a_words
,
b_words
,
n_words
,
len
,
inv
.
longValue
(),
null
));
// The intrinsic may not return the same value as the longhand
// calculation but they must have the same residue mod N.
if
(!
slow
.
mod
(
n
).
equals
(
fast
.
mod
(
n
)))
{
throw
new
RuntimeException
();
}
}
Random
rnd
=
new
Random
(
0
);
// Return a random value of length <= bits in an array of even length
int
[]
random_val
(
int
bits
)
{
int
len
=
(
bits
+
63
)/
64
;
// i.e. length in longs
int
[]
val
=
new
int
[
len
*
2
];
for
(
int
i
=
0
;
i
<
val
.
length
;
i
++)
val
[
i
]
=
rnd
.
nextInt
();
int
leadingZeros
=
64
-
(
bits
&
64
);
if
(
leadingZeros
>=
32
)
{
val
[
0
]
=
0
;
val
[
1
]
&=
~(-
1
l
<<
(
leadingZeros
&
31
));
}
else
{
val
[
0
]
&=
~(-
1
l
<<
leadingZeros
);
}
return
val
;
}
void
testOneLength
(
int
lenInBits
,
int
lenInInts
)
throws
Throwable
{
BigInteger
mod
=
new
BigInteger
(
lenInBits
,
2
,
rnd
);
BigInteger
r
=
BigInteger
.
ONE
.
shiftLeft
(
lenInInts
*
32
);
BigInteger
n_prime
=
mod
.
modInverse
(
r
).
negate
();
// Make n.length even, padding with a zero if necessary
int
[]
n
=
mag
(
mod
);
if
(
n
.
length
<
lenInInts
)
{
int
[]
x
=
new
int
[
lenInInts
];
System
.
arraycopy
(
n
,
0
,
x
,
lenInInts
-
n
.
length
,
n
.
length
);
n
=
x
;
}
for
(
int
i
=
0
;
i
<
10000
;
i
++)
{
// multiply
check
(
random_val
(
lenInBits
),
random_val
(
lenInBits
),
n
,
lenInInts
,
n_prime
);
// square
int
[]
tmp
=
random_val
(
lenInBits
);
check
(
tmp
,
tmp
,
n
,
lenInInts
,
n_prime
);
}
}
// Test the Montgomery multiply intrinsic with a bunch of random
// values of varying lengths. Do this for long enough that the
// caller of the intrinsic is C2-compiled.
void
testResultValues
()
throws
Throwable
{
// Test a couple of interesting edge cases.
testOneLength
(
1024
,
32
);
testOneLength
(
1025
,
34
);
for
(
int
j
=
10
;
j
>
0
;
j
--)
{
// Construct a random prime whose length in words is even
int
lenInBits
=
rnd
.
nextInt
(
2048
)
+
64
;
int
lenInInts
=
(
lenInBits
+
63
)/
64
*
2
;
testOneLength
(
lenInBits
,
lenInInts
);
}
}
// Range checks
void
testOneMontgomeryMultiplyCheck
(
int
[]
a
,
int
[]
b
,
int
[]
n
,
int
len
,
long
inv
,
int
[]
product
,
Class
klass
)
{
try
{
montgomeryMultiply
(
a
,
b
,
n
,
len
,
inv
,
product
);
}
catch
(
Throwable
ex
)
{
if
(
klass
.
isAssignableFrom
(
ex
.
getClass
()))
return
;
throw
new
RuntimeException
(
klass
+
" expected, "
+
ex
+
" was thrown"
);
}
throw
new
RuntimeException
(
klass
+
" expected, was not thrown"
);
}
void
testOneMontgomeryMultiplyCheck
(
int
[]
a
,
int
[]
b
,
BigInteger
n
,
int
len
,
BigInteger
inv
,
Class
klass
)
{
testOneMontgomeryMultiplyCheck
(
a
,
b
,
mag
(
n
),
len
,
inv
.
longValue
(),
null
,
klass
);
}
void
testOneMontgomeryMultiplyCheck
(
int
[]
a
,
int
[]
b
,
BigInteger
n
,
int
len
,
BigInteger
inv
,
int
[]
product
,
Class
klass
)
{
testOneMontgomeryMultiplyCheck
(
a
,
b
,
mag
(
n
),
len
,
inv
.
longValue
(),
product
,
klass
);
}
void
testMontgomeryMultiplyChecks
()
{
int
[]
blah
=
random_val
(
40
);
int
[]
small
=
random_val
(
39
);
BigInteger
mod
=
new
BigInteger
(
40
*
32
,
2
,
rnd
);
BigInteger
r
=
BigInteger
.
ONE
.
shiftLeft
(
40
*
32
);
BigInteger
n_prime
=
mod
.
modInverse
(
r
).
negate
();
// Length out of range: square
testOneMontgomeryMultiplyCheck
(
blah
,
blah
,
mod
,
41
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
,
mod
,
0
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
,
mod
,
-
1
,
n_prime
,
IllegalArgumentException
.
class
);
// As above, but for multiply
testOneMontgomeryMultiplyCheck
(
blah
,
blah
.
clone
(),
mod
,
41
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
.
clone
(),
mod
,
0
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
.
clone
(),
mod
,
0
,
n_prime
,
IllegalArgumentException
.
class
);
// Length odd
testOneMontgomeryMultiplyCheck
(
small
,
small
,
mod
,
39
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
small
,
mod
,
0
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
small
,
mod
,
-
1
,
n_prime
,
IllegalArgumentException
.
class
);
// As above, but for multiply
testOneMontgomeryMultiplyCheck
(
small
,
small
.
clone
(),
mod
,
39
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
small
.
clone
(),
mod
,
0
,
n_prime
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
small
.
clone
(),
mod
,
-
1
,
n_prime
,
IllegalArgumentException
.
class
);
// array too small
testOneMontgomeryMultiplyCheck
(
blah
,
blah
,
mod
,
40
,
n_prime
,
small
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
.
clone
(),
mod
,
40
,
n_prime
,
small
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
blah
,
mod
,
40
,
n_prime
,
blah
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
small
,
mod
,
40
,
n_prime
,
blah
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
blah
,
blah
,
mod
,
40
,
n_prime
,
small
,
IllegalArgumentException
.
class
);
testOneMontgomeryMultiplyCheck
(
small
,
small
,
mod
,
40
,
n_prime
,
blah
,
IllegalArgumentException
.
class
);
}
public
static
void
main
(
String
args
[])
{
try
{
new
MontgomeryMultiplyTest
().
testMontgomeryMultiplyChecks
();
new
MontgomeryMultiplyTest
().
testResultValues
();
}
catch
(
Throwable
ex
)
{
throw
new
RuntimeException
(
ex
);
}
}
}
test/compiler/intrinsics/muladd/TestMulAdd.java
0 → 100644
浏览文件 @
de15436a
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
* @bug 8081778
* @summary Add C2 x86 intrinsic for BigInteger::mulAdd() method
*
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
* -XX:+IgnoreUnrecognizedVMOptions -XX:-UseSquareToLenIntrinsic -XX:-UseMultiplyToLenIntrinsic
* -XX:+UseMulAddIntrinsic
* -XX:CompileCommand=dontinline,TestMulAdd::main
* -XX:CompileCommand=option,TestMulAdd::base_multiply,ccstr,DisableIntrinsic,_mulAdd
* -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_mulAdd
* -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_mulAdd
* -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_mulAdd
* -XX:CompileCommand=option,java.math.BigInteger::mulAdd,ccstr,DisableIntrinsic,_mulAdd
* -XX:CompileCommand=inline,java.math.BigInteger::multiply
* -XX:CompileCommand=inline,java.math.BigInteger::square
* -XX:CompileCommand=inline,java.math.BigInteger::squareToLen
* -XX:CompileCommand=inline,java.math.BigInteger::mulAdd TestMulAdd
*/
import
java.util.Random
;
import
java.math.*
;
public
class
TestMulAdd
{
// Avoid intrinsic by preventing inlining multiply() and mulAdd().
public
static
BigInteger
base_multiply
(
BigInteger
op1
)
{
return
op1
.
multiply
(
op1
);
}
// Generate mulAdd() intrinsic by inlining multiply().
public
static
BigInteger
new_multiply
(
BigInteger
op1
)
{
return
op1
.
multiply
(
op1
);
}
public
static
boolean
bytecompare
(
BigInteger
b1
,
BigInteger
b2
)
{
byte
[]
data1
=
b1
.
toByteArray
();
byte
[]
data2
=
b2
.
toByteArray
();
if
(
data1
.
length
!=
data2
.
length
)
return
false
;
for
(
int
i
=
0
;
i
<
data1
.
length
;
i
++)
{
if
(
data1
[
i
]
!=
data2
[
i
])
return
false
;
}
return
true
;
}
public
static
String
stringify
(
BigInteger
b
)
{
String
strout
=
""
;
byte
[]
data
=
b
.
toByteArray
();
for
(
int
i
=
0
;
i
<
data
.
length
;
i
++)
{
strout
+=
(
String
.
format
(
"%02x"
,
data
[
i
])
+
" "
);
}
return
strout
;
}
public
static
void
main
(
String
args
[])
throws
Exception
{
BigInteger
oldsum
=
new
BigInteger
(
"0"
);
BigInteger
newsum
=
new
BigInteger
(
"0"
);
BigInteger
b1
,
b2
,
oldres
,
newres
;
Random
rand
=
new
Random
();
long
seed
=
System
.
nanoTime
();
Random
rand1
=
new
Random
();
long
seed1
=
System
.
nanoTime
();
rand
.
setSeed
(
seed
);
rand1
.
setSeed
(
seed1
);
for
(
int
j
=
0
;
j
<
100000
;
j
++)
{
int
rand_int
=
rand1
.
nextInt
(
3136
)+
32
;
b1
=
new
BigInteger
(
rand_int
,
rand
);
oldres
=
base_multiply
(
b1
);
newres
=
new_multiply
(
b1
);
oldsum
=
oldsum
.
add
(
oldres
);
newsum
=
newsum
.
add
(
newres
);
if
(!
bytecompare
(
oldres
,
newres
))
{
System
.
out
.
print
(
"mismatch for:b1:"
+
stringify
(
b1
)
+
" :oldres:"
+
stringify
(
oldres
)
+
" :newres:"
+
stringify
(
newres
));
System
.
out
.
println
(
b1
);
throw
new
Exception
(
"Failed"
);
}
}
if
(!
bytecompare
(
oldsum
,
newsum
))
{
System
.
out
.
println
(
"Failure: oldsum:"
+
stringify
(
oldsum
)
+
" newsum:"
+
stringify
(
newsum
));
throw
new
Exception
(
"Failed"
);
}
else
{
System
.
out
.
println
(
"Success"
);
}
}
}
test/compiler/intrinsics/squaretolen/TestSquareToLen.java
0 → 100644
浏览文件 @
de15436a
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
* @bug 8081778
* @summary Add C2 x86 intrinsic for BigInteger::squareToLen() method
*
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
* -XX:+IgnoreUnrecognizedVMOptions
* -XX:+UseSquareToLenIntrinsic
* -XX:CompileCommand=exclude,TestSquareToLen::main
* -XX:CompileCommand=option,TestSquareToLen::base_multiply,ccstr,DisableIntrinsic,_squareToLen
* -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_squareToLen
* -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_squareToLen
* -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_squareToLen
* -XX:CompileCommand=inline,java.math.BigInteger::multiply
* -XX:CompileCommand=inline,java.math.BigInteger::square
* -XX:CompileCommand=inline,java.math.BigInteger::squareToLen TestSquareToLen
*/
import
java.util.Random
;
import
java.math.*
;
public
class
TestSquareToLen
{
// Avoid intrinsic by preventing inlining multiply() and squareToLen().
public
static
BigInteger
base_multiply
(
BigInteger
op1
)
{
return
op1
.
multiply
(
op1
);
}
// Generate squareToLen() intrinsic by inlining multiply().
public
static
BigInteger
new_multiply
(
BigInteger
op1
)
{
return
op1
.
multiply
(
op1
);
}
public
static
boolean
bytecompare
(
BigInteger
b1
,
BigInteger
b2
)
{
byte
[]
data1
=
b1
.
toByteArray
();
byte
[]
data2
=
b2
.
toByteArray
();
if
(
data1
.
length
!=
data2
.
length
)
return
false
;
for
(
int
i
=
0
;
i
<
data1
.
length
;
i
++)
{
if
(
data1
[
i
]
!=
data2
[
i
])
return
false
;
}
return
true
;
}
public
static
String
stringify
(
BigInteger
b
)
{
String
strout
=
""
;
byte
[]
data
=
b
.
toByteArray
();
for
(
int
i
=
0
;
i
<
data
.
length
;
i
++)
{
strout
+=
(
String
.
format
(
"%02x"
,
data
[
i
])
+
" "
);
}
return
strout
;
}
public
static
void
main
(
String
args
[])
throws
Exception
{
BigInteger
oldsum
=
new
BigInteger
(
"0"
);
BigInteger
newsum
=
new
BigInteger
(
"0"
);
BigInteger
b1
,
b2
,
oldres
,
newres
;
Random
rand
=
new
Random
();
long
seed
=
System
.
nanoTime
();
Random
rand1
=
new
Random
();
long
seed1
=
System
.
nanoTime
();
rand
.
setSeed
(
seed
);
rand1
.
setSeed
(
seed1
);
for
(
int
j
=
0
;
j
<
100000
;
j
++)
{
int
rand_int
=
rand1
.
nextInt
(
3136
)+
32
;
b1
=
new
BigInteger
(
rand_int
,
rand
);
oldres
=
base_multiply
(
b1
);
newres
=
new_multiply
(
b1
);
oldsum
=
oldsum
.
add
(
oldres
);
newsum
=
newsum
.
add
(
newres
);
if
(!
bytecompare
(
oldres
,
newres
))
{
System
.
out
.
print
(
"mismatch for:b1:"
+
stringify
(
b1
)
+
" :oldres:"
+
stringify
(
oldres
)
+
" :newres:"
+
stringify
(
newres
));
System
.
out
.
println
(
b1
);
throw
new
Exception
(
"Failed"
);
}
}
if
(!
bytecompare
(
oldsum
,
newsum
))
{
System
.
out
.
println
(
"Failure: oldsum:"
+
stringify
(
oldsum
)
+
" newsum:"
+
stringify
(
newsum
));
throw
new
Exception
(
"Failed"
);
}
else
{
System
.
out
.
println
(
"Success"
);
}
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录