Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell8_hotspot
提交
510019d2
D
dragonwell8_hotspot
项目概览
openanolis
/
dragonwell8_hotspot
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell8_hotspot
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
510019d2
编写于
5月 19, 2015
作者:
A
amurillo
浏览文件
操作
浏览文件
下载
差异文件
Merge
上级
d9e9a7b8
123028c7
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
234 addition
and
34 deletion
+234
-34
.hgtags
.hgtags
+1
-0
make/hotspot_version
make/hotspot_version
+1
-1
src/cpu/x86/vm/macroAssembler_x86.cpp
src/cpu/x86/vm/macroAssembler_x86.cpp
+11
-5
src/cpu/x86/vm/stubGenerator_x86_32.cpp
src/cpu/x86/vm/stubGenerator_x86_32.cpp
+2
-1
src/cpu/x86/vm/stubGenerator_x86_64.cpp
src/cpu/x86/vm/stubGenerator_x86_64.cpp
+4
-2
src/share/vm/opto/superword.cpp
src/share/vm/opto/superword.cpp
+69
-23
src/share/vm/opto/superword.hpp
src/share/vm/opto/superword.hpp
+2
-2
test/compiler/loopopts/superword/TestVectorizationWithInvariant.java
...er/loopopts/superword/TestVectorizationWithInvariant.java
+144
-0
未找到文件。
.hgtags
浏览文件 @
510019d2
...
...
@@ -641,3 +641,4 @@ ced08ed4924fc6581626c7ce2d769fc18d7b23e0 jdk8u60-b13
c9f8b7319d0a5ab07310cf53507642a8fd91589b jdk8u60-b14
4187dc92e90b16b4097627b8af4f5e6e63f3b497 hs25.60-b15
b99f1bf208f385277b03a985d35b6614b4095f3e jdk8u60-b15
f5800068c61d0627c14e99836e9ce5cf0ef00075 hs25.60-b16
make/hotspot_version
浏览文件 @
510019d2
...
...
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2015
HS_MAJOR_VER=25
HS_MINOR_VER=60
HS_BUILD_NUMBER=1
5
HS_BUILD_NUMBER=1
6
JDK_MAJOR_VER=1
JDK_MINOR_VER=8
...
...
src/cpu/x86/vm/macroAssembler_x86.cpp
浏览文件 @
510019d2
...
...
@@ -6690,7 +6690,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
subl
(
cnt2
,
stride2
);
jccb
(
Assembler
::
notZero
,
COMPARE_WIDE_VECTORS_LOOP
);
// clean upper bits of YMM registers
v
zeroupper
(
);
v
pxor
(
vec1
,
vec1
);
// compare wide vectors tail
bind
(
COMPARE_WIDE_TAIL
);
...
...
@@ -6705,7 +6705,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind
(
VECTOR_NOT_EQUAL
);
// clean upper bits of YMM registers
v
zeroupper
(
);
v
pxor
(
vec1
,
vec1
);
lea
(
str1
,
Address
(
str1
,
result
,
scale
));
lea
(
str2
,
Address
(
str2
,
result
,
scale
));
jmp
(
COMPARE_16_CHARS
);
...
...
@@ -6964,7 +6964,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
bind
(
DONE
);
if
(
UseAVX
>=
2
)
{
// clean upper bits of YMM registers
vzeroupper
();
vpxor
(
vec1
,
vec1
);
vpxor
(
vec2
,
vec2
);
}
}
...
...
@@ -7098,7 +7099,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
BIND
(
L_check_fill_8_bytes
);
// clean upper bits of YMM registers
vzeroupper
();
movdl
(
xtmp
,
value
);
pshufd
(
xtmp
,
xtmp
,
0
);
}
else
{
// Fill 32-byte chunks
pshufd
(
xtmp
,
xtmp
,
0
);
...
...
@@ -7261,7 +7263,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
bind
(
L_copy_16_chars_exit
);
if
(
UseAVX
>=
2
)
{
// clean upper bits of YMM registers
vzeroupper
();
vpxor
(
tmp2Reg
,
tmp2Reg
);
vpxor
(
tmp3Reg
,
tmp3Reg
);
vpxor
(
tmp4Reg
,
tmp4Reg
);
movdl
(
tmp1Reg
,
tmp5
);
pshufd
(
tmp1Reg
,
tmp1Reg
,
0
);
}
subptr
(
len
,
8
);
jccb
(
Assembler
::
greater
,
L_copy_8_chars_exit
);
...
...
src/cpu/x86/vm/stubGenerator_x86_32.cpp
浏览文件 @
510019d2
...
...
@@ -837,7 +837,8 @@ class StubGenerator: public StubCodeGenerator {
if
(
UseUnalignedLoadStores
&&
(
UseAVX
>=
2
))
{
// clean upper bits of YMM registers
__
vzeroupper
();
__
vpxor
(
xmm0
,
xmm0
);
__
vpxor
(
xmm1
,
xmm1
);
}
__
addl
(
qword_count
,
8
);
__
jccb
(
Assembler
::
zero
,
L_exit
);
...
...
src/cpu/x86/vm/stubGenerator_x86_64.cpp
浏览文件 @
510019d2
...
...
@@ -1328,7 +1328,8 @@ class StubGenerator: public StubCodeGenerator {
__
BIND
(
L_end
);
if
(
UseAVX
>=
2
)
{
// clean upper bits of YMM registers
__
vzeroupper
();
__
vpxor
(
xmm0
,
xmm0
);
__
vpxor
(
xmm1
,
xmm1
);
}
}
else
{
// Copy 32-bytes per iteration
...
...
@@ -1405,7 +1406,8 @@ class StubGenerator: public StubCodeGenerator {
__
BIND
(
L_end
);
if
(
UseAVX
>=
2
)
{
// clean upper bits of YMM registers
__
vzeroupper
();
__
vpxor
(
xmm0
,
xmm0
);
__
vpxor
(
xmm1
,
xmm1
);
}
}
else
{
// Copy 32-bytes per iteration
...
...
src/share/vm/opto/superword.cpp
浏览文件 @
510019d2
...
...
@@ -232,6 +232,13 @@ void SuperWord::find_adjacent_refs() {
// if unaligned memory access is not allowed because number of
// iterations in pre-loop will be not enough to align it.
create_pack
=
false
;
}
else
{
SWPointer
p2
(
best_align_to_mem_ref
,
this
);
if
(
align_to_ref_p
.
invar
()
!=
p2
.
invar
())
{
// Do not vectorize memory accesses with different invariants
// if unaligned memory accesses are not allowed.
create_pack
=
false
;
}
}
}
}
else
{
...
...
@@ -445,29 +452,57 @@ bool SuperWord::ref_is_alignable(SWPointer& p) {
int
preloop_stride
=
pre_end
->
stride_con
();
int
span
=
preloop_stride
*
p
.
scale_in_bytes
();
// Stride one accesses are alignable.
if
(
ABS
(
span
)
==
p
.
memory_size
())
int
mem_size
=
p
.
memory_size
();
int
offset
=
p
.
offset_in_bytes
();
// Stride one accesses are alignable if offset is aligned to memory operation size.
// Offset can be unaligned when UseUnalignedAccesses is used.
if
(
ABS
(
span
)
==
mem_size
&&
(
ABS
(
offset
)
%
mem_size
)
==
0
)
{
return
true
;
// If initial offset from start of object is computable,
// compute alignment within the vector.
}
// If the initial offset from start of the object is computable,
// check if the pre-loop can align the final offset accordingly.
//
// In other words: Can we find an i such that the offset
// after i pre-loop iterations is aligned to vw?
// (init_offset + pre_loop) % vw == 0 (1)
// where
// pre_loop = i * span
// is the number of bytes added to the offset by i pre-loop iterations.
//
// For this to hold we need pre_loop to increase init_offset by
// pre_loop = vw - (init_offset % vw)
//
// This is only possible if pre_loop is divisible by span because each
// pre-loop iteration increases the initial offset by 'span' bytes:
// (vw - (init_offset % vw)) % span == 0
//
int
vw
=
vector_width_in_bytes
(
p
.
mem
());
assert
(
vw
>
1
,
"sanity"
);
if
(
vw
%
span
==
0
)
{
Node
*
init_nd
=
pre_end
->
init_trip
();
if
(
init_nd
->
is_Con
()
&&
p
.
invar
()
==
NULL
)
{
int
init
=
init_nd
->
bottom_type
()
->
is_int
()
->
get_con
();
int
init_offset
=
init
*
p
.
scale_in_bytes
()
+
p
.
offset_in_bytes
();
assert
(
init_offset
>=
0
,
"positive offset from object start"
);
Node
*
init_nd
=
pre_end
->
init_trip
();
if
(
init_nd
->
is_Con
()
&&
p
.
invar
()
==
NULL
)
{
int
init
=
init_nd
->
bottom_type
()
->
is_int
()
->
get_con
();
int
init_offset
=
init
*
p
.
scale_in_bytes
()
+
offset
;
assert
(
init_offset
>=
0
,
"positive offset from object start"
);
if
(
vw
%
span
==
0
)
{
// If vm is a multiple of span, we use formula (1).
if
(
span
>
0
)
{
return
(
vw
-
(
init_offset
%
vw
))
%
span
==
0
;
}
else
{
assert
(
span
<
0
,
"nonzero stride * scale"
);
return
(
init_offset
%
vw
)
%
-
span
==
0
;
}
}
else
if
(
span
%
vw
==
0
)
{
// If span is a multiple of vw, we can simplify formula (1) to:
// (init_offset + i * span) % vw == 0
// =>
// (init_offset % vw) + ((i * span) % vw) == 0
// =>
// init_offset % vw == 0
//
// Because we add a multiple of vw to the initial offset, the final
// offset is a multiple of vw if and only if init_offset is a multiple.
//
return
(
init_offset
%
vw
)
==
0
;
}
}
return
false
;
...
...
@@ -479,17 +514,23 @@ int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
SWPointer
align_to_ref_p
(
mem_ref
,
this
);
int
offset
=
align_to_ref_p
.
offset_in_bytes
();
int
scale
=
align_to_ref_p
.
scale_in_bytes
();
int
elt_size
=
align_to_ref_p
.
memory_size
();
int
vw
=
vector_width_in_bytes
(
mem_ref
);
assert
(
vw
>
1
,
"sanity"
);
int
stride_sign
=
(
scale
*
iv_stride
())
>
0
?
1
:
-
1
;
// At least one iteration is executed in pre-loop by default. As result
// several iterations are needed to align memory operations in main-loop even
// if offset is 0.
int
iv_adjustment_in_bytes
=
(
stride_sign
*
vw
-
(
offset
%
vw
));
int
elt_size
=
align_to_ref_p
.
memory_size
();
assert
(((
ABS
(
iv_adjustment_in_bytes
)
%
elt_size
)
==
0
),
err_msg_res
(
"(%d) should be divisible by (%d)"
,
iv_adjustment_in_bytes
,
elt_size
));
int
iv_adjustment
=
iv_adjustment_in_bytes
/
elt_size
;
int
iv_adjustment
;
if
(
scale
!=
0
)
{
int
stride_sign
=
(
scale
*
iv_stride
())
>
0
?
1
:
-
1
;
// At least one iteration is executed in pre-loop by default. As result
// several iterations are needed to align memory operations in main-loop even
// if offset is 0.
int
iv_adjustment_in_bytes
=
(
stride_sign
*
vw
-
(
offset
%
vw
));
assert
(((
ABS
(
iv_adjustment_in_bytes
)
%
elt_size
)
==
0
),
err_msg_res
(
"(%d) should be divisible by (%d)"
,
iv_adjustment_in_bytes
,
elt_size
));
iv_adjustment
=
iv_adjustment_in_bytes
/
elt_size
;
}
else
{
// This memory op is not dependent on iv (scale == 0)
iv_adjustment
=
0
;
}
#ifndef PRODUCT
if
(
TraceSuperWord
)
...
...
@@ -2247,6 +2288,11 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
}
// Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
Node
*
base
=
adr
->
in
(
AddPNode
::
Base
);
// The base address should be loop invariant
if
(
!
invariant
(
base
))
{
assert
(
!
valid
(),
"base address is loop variant"
);
return
;
}
//unsafe reference could not be aligned appropriately without runtime checking
if
(
base
==
NULL
||
base
->
bottom_type
()
==
Type
::
TOP
)
{
assert
(
!
valid
(),
"unsafe access"
);
...
...
src/share/vm/opto/superword.hpp
浏览文件 @
510019d2
...
...
@@ -41,7 +41,7 @@
// Exploiting SuperWord Level Parallelism with
// Multimedia Instruction Sets
// by
// Samuel Larsen and Saman Amarasighe
// Samuel Larsen and Saman Amarasi
n
ghe
// MIT Laboratory for Computer Science
// date
// May 2000
...
...
@@ -432,7 +432,7 @@ class SWPointer VALUE_OBJ_CLASS_SPEC {
Node
*
_base
;
// NULL if unsafe nonheap reference
Node
*
_adr
;
// address pointer
jint
_scale
;
// multipler for iv (in bytes), 0 if no loop iv
jint
_scale
;
// multipl
i
er for iv (in bytes), 0 if no loop iv
jint
_offset
;
// constant offset (in bytes)
Node
*
_invar
;
// invariant offset (in bytes), NULL if none
bool
_negate_invar
;
// if true then use: (0 - _invar)
...
...
test/compiler/loopopts/superword/TestVectorizationWithInvariant.java
0 → 100644
浏览文件 @
510019d2
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
import
com.oracle.java.testlibrary.*
;
import
sun.misc.Unsafe
;
/**
* @test
* @bug 8078497
* @summary Tests correct alignment of vectors with loop invariant offset.
* @library /testlibrary
* @run main TestVectorizationWithInvariant
*/
public
class
TestVectorizationWithInvariant
{
private
static
Unsafe
unsafe
;
private
static
final
long
BYTE_ARRAY_OFFSET
;
private
static
final
long
CHAR_ARRAY_OFFSET
;
static
{
unsafe
=
Utils
.
getUnsafe
();
BYTE_ARRAY_OFFSET
=
unsafe
.
arrayBaseOffset
(
byte
[].
class
);
CHAR_ARRAY_OFFSET
=
unsafe
.
arrayBaseOffset
(
char
[].
class
);
}
public
static
void
main
(
String
[]
args
)
throws
Exception
{
byte
[]
byte_array1
=
new
byte
[
1000
];
byte
[]
byte_array2
=
new
byte
[
1000
];
char
[]
char_array
=
new
char
[
1000
];
for
(
int
i
=
0
;
i
<
20_000
;
++
i
)
{
copyByteToChar
(
byte_array1
,
byte_array2
,
char_array
,
1
);
copyCharToByte
(
char_array
,
byte_array1
,
1
);
copyCharToByteAligned
(
char_array
,
byte_array1
);
copyCharToByteUnaligned
(
char_array
,
byte_array1
);
}
}
/*
* Copy multiple consecutive chars from a byte array to a given offset in a char array
* to trigger C2's superword optimization. The offset in the byte array is independent
* of the loop induction variable and can be set to an arbitrary value. It may then not
* be possible to both align the LoadUS and the StoreC operations. Therefore, vectorization
* should only be done in this case if unaligned memory accesses are allowed.
*/
public
static
void
copyByteToChar
(
byte
[]
src1
,
byte
[]
src2
,
char
[]
dst
,
int
off
)
{
off
=
(
int
)
BYTE_ARRAY_OFFSET
+
(
off
<<
1
);
byte
[]
src
=
src1
;
for
(
int
i
=
(
int
)
CHAR_ARRAY_OFFSET
;
i
<
100
;
i
=
i
+
8
)
{
// Copy 8 chars from src to dst
unsafe
.
putChar
(
dst
,
i
+
0
,
unsafe
.
getChar
(
src
,
off
+
0
));
unsafe
.
putChar
(
dst
,
i
+
2
,
unsafe
.
getChar
(
src
,
off
+
2
));
unsafe
.
putChar
(
dst
,
i
+
4
,
unsafe
.
getChar
(
src
,
off
+
4
));
unsafe
.
putChar
(
dst
,
i
+
6
,
unsafe
.
getChar
(
src
,
off
+
6
));
unsafe
.
putChar
(
dst
,
i
+
8
,
unsafe
.
getChar
(
src
,
off
+
8
));
unsafe
.
putChar
(
dst
,
i
+
10
,
unsafe
.
getChar
(
src
,
off
+
10
));
unsafe
.
putChar
(
dst
,
i
+
12
,
unsafe
.
getChar
(
src
,
off
+
12
));
unsafe
.
putChar
(
dst
,
i
+
14
,
unsafe
.
getChar
(
src
,
off
+
14
));
// Prevent loop invariant code motion of char read.
src
=
(
src
==
src1
)
?
src2
:
src1
;
}
}
/*
* Copy multiple consecutive chars from a char array to a given offset in a byte array
* to trigger C2's superword optimization. Checks for similar problems as 'copyByteToChar'.
*/
public
static
void
copyCharToByte
(
char
[]
src
,
byte
[]
dst
,
int
off
)
{
off
=
(
int
)
BYTE_ARRAY_OFFSET
+
(
off
<<
1
);
for
(
int
i
=
0
;
i
<
100
;
i
=
i
+
8
)
{
// Copy 8 chars from src to dst
unsafe
.
putChar
(
dst
,
off
+
0
,
src
[
i
+
0
]);
unsafe
.
putChar
(
dst
,
off
+
2
,
src
[
i
+
1
]);
unsafe
.
putChar
(
dst
,
off
+
4
,
src
[
i
+
2
]);
unsafe
.
putChar
(
dst
,
off
+
6
,
src
[
i
+
3
]);
unsafe
.
putChar
(
dst
,
off
+
8
,
src
[
i
+
4
]);
unsafe
.
putChar
(
dst
,
off
+
10
,
src
[
i
+
5
]);
unsafe
.
putChar
(
dst
,
off
+
12
,
src
[
i
+
6
]);
unsafe
.
putChar
(
dst
,
off
+
14
,
src
[
i
+
7
]);
}
}
/*
* Variant of copyCharToByte with a constant destination array offset.
* The loop should always be vectorized because both the LoadUS and StoreC
* operations can be aligned.
*/
public
static
void
copyCharToByteAligned
(
char
[]
src
,
byte
[]
dst
)
{
final
int
off
=
(
int
)
BYTE_ARRAY_OFFSET
;
for
(
int
i
=
8
;
i
<
100
;
i
=
i
+
8
)
{
// Copy 8 chars from src to dst
unsafe
.
putChar
(
dst
,
off
+
0
,
src
[
i
+
0
]);
unsafe
.
putChar
(
dst
,
off
+
2
,
src
[
i
+
1
]);
unsafe
.
putChar
(
dst
,
off
+
4
,
src
[
i
+
2
]);
unsafe
.
putChar
(
dst
,
off
+
6
,
src
[
i
+
3
]);
unsafe
.
putChar
(
dst
,
off
+
8
,
src
[
i
+
4
]);
unsafe
.
putChar
(
dst
,
off
+
10
,
src
[
i
+
5
]);
unsafe
.
putChar
(
dst
,
off
+
12
,
src
[
i
+
6
]);
unsafe
.
putChar
(
dst
,
off
+
14
,
src
[
i
+
7
]);
}
}
/*
* Variant of copyCharToByte with a constant destination array offset. The
* loop should only be vectorized if unaligned memory operations are allowed
* because not both the LoadUS and the StoreC can be aligned.
*/
public
static
void
copyCharToByteUnaligned
(
char
[]
src
,
byte
[]
dst
)
{
final
int
off
=
(
int
)
BYTE_ARRAY_OFFSET
+
2
;
for
(
int
i
=
0
;
i
<
100
;
i
=
i
+
8
)
{
// Copy 8 chars from src to dst
unsafe
.
putChar
(
dst
,
off
+
0
,
src
[
i
+
0
]);
unsafe
.
putChar
(
dst
,
off
+
2
,
src
[
i
+
1
]);
unsafe
.
putChar
(
dst
,
off
+
4
,
src
[
i
+
2
]);
unsafe
.
putChar
(
dst
,
off
+
6
,
src
[
i
+
3
]);
unsafe
.
putChar
(
dst
,
off
+
8
,
src
[
i
+
4
]);
unsafe
.
putChar
(
dst
,
off
+
10
,
src
[
i
+
5
]);
unsafe
.
putChar
(
dst
,
off
+
12
,
src
[
i
+
6
]);
unsafe
.
putChar
(
dst
,
off
+
14
,
src
[
i
+
7
]);
}
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录