Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell11
提交
6d9460f7
D
dragonwell11
项目概览
openanolis
/
dragonwell11
通知
7
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell11
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6d9460f7
编写于
8月 27, 2010
作者:
N
never
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
上级
babc6e40
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
940 addition
and
11 deletion
+940
-11
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+186
-0
hotspot/src/cpu/x86/vm/assembler_x86.cpp
hotspot/src/cpu/x86/vm/assembler_x86.cpp
+180
-0
hotspot/src/cpu/x86/vm/assembler_x86.hpp
hotspot/src/cpu/x86/vm/assembler_x86.hpp
+5
-0
hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+34
-0
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+27
-0
hotspot/src/share/vm/includeDB_compiler2
hotspot/src/share/vm/includeDB_compiler2
+1
-0
hotspot/src/share/vm/opto/addnode.cpp
hotspot/src/share/vm/opto/addnode.cpp
+3
-0
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/opto/c2_globals.hpp
+6
-0
hotspot/src/share/vm/opto/loopTransform.cpp
hotspot/src/share/vm/opto/loopTransform.cpp
+362
-10
hotspot/src/share/vm/opto/loopnode.cpp
hotspot/src/share/vm/opto/loopnode.cpp
+6
-0
hotspot/src/share/vm/opto/loopnode.hpp
hotspot/src/share/vm/opto/loopnode.hpp
+6
-0
hotspot/src/share/vm/opto/runtime.cpp
hotspot/src/share/vm/opto/runtime.cpp
+16
-0
hotspot/src/share/vm/opto/runtime.hpp
hotspot/src/share/vm/opto/runtime.hpp
+2
-0
hotspot/src/share/vm/runtime/arguments.cpp
hotspot/src/share/vm/runtime/arguments.cpp
+3
-0
hotspot/src/share/vm/runtime/stubRoutines.cpp
hotspot/src/share/vm/runtime/stubRoutines.cpp
+85
-0
hotspot/src/share/vm/runtime/stubRoutines.hpp
hotspot/src/share/vm/runtime/stubRoutines.hpp
+17
-0
hotspot/src/share/vm/utilities/globalDefinitions.hpp
hotspot/src/share/vm/utilities/globalDefinitions.hpp
+1
-1
未找到文件。
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
浏览文件 @
6d9460f7
...
...
@@ -1587,6 +1587,185 @@ class StubGenerator: public StubCodeGenerator {
return
start
;
}
//
// Generate stub for disjoint short fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: O0
// value: O1
// count: O2 treated as signed
//
address
generate_fill
(
BasicType
t
,
bool
aligned
,
const
char
*
name
)
{
__
align
(
CodeEntryAlignment
);
StubCodeMark
mark
(
this
,
"StubRoutines"
,
name
);
address
start
=
__
pc
();
const
Register
to
=
O0
;
// source array address
const
Register
value
=
O1
;
// fill value
const
Register
count
=
O2
;
// elements count
// O3 is used as a temp register
assert_clean_int
(
count
,
O3
);
// Make sure 'count' is clean int.
Label
L_exit
,
L_skip_align1
,
L_skip_align2
,
L_fill_byte
;
Label
L_fill_2_bytes
,
L_fill_4_bytes
,
L_fill_32_bytes
;
int
shift
=
-
1
;
switch
(
t
)
{
case
T_BYTE
:
shift
=
2
;
break
;
case
T_SHORT
:
shift
=
1
;
break
;
case
T_INT
:
shift
=
0
;
break
;
default:
ShouldNotReachHere
();
}
BLOCK_COMMENT
(
"Entry:"
);
if
(
t
==
T_BYTE
)
{
// Zero extend value
__
and3
(
value
,
0xff
,
value
);
__
sllx
(
value
,
8
,
O3
);
__
or3
(
value
,
O3
,
value
);
}
if
(
t
==
T_SHORT
)
{
// Zero extend value
__
sethi
(
0xffff0000
,
O3
);
__
andn
(
value
,
O3
,
value
);
}
if
(
t
==
T_BYTE
||
t
==
T_SHORT
)
{
__
sllx
(
value
,
16
,
O3
);
__
or3
(
value
,
O3
,
value
);
}
__
cmp
(
count
,
2
<<
shift
);
// Short arrays (< 8 bytes) fill by element
__
brx
(
Assembler
::
lessUnsigned
,
false
,
Assembler
::
pn
,
L_fill_4_bytes
);
// use unsigned cmp
__
delayed
()
->
andcc
(
count
,
1
<<
shift
,
G0
);
if
(
!
aligned
&&
(
t
==
T_BYTE
||
t
==
T_SHORT
))
{
// align source address at 4 bytes address boundary
if
(
t
==
T_BYTE
)
{
// One byte misalignment happens only for byte arrays
__
andcc
(
to
,
1
,
G0
);
__
br
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_skip_align1
);
__
delayed
()
->
nop
();
__
stb
(
value
,
to
,
0
);
__
inc
(
to
,
1
);
__
dec
(
count
,
1
);
__
BIND
(
L_skip_align1
);
}
// Two bytes misalignment happens only for byte and short (char) arrays
__
andcc
(
to
,
2
,
G0
);
__
br
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_skip_align2
);
__
delayed
()
->
nop
();
__
sth
(
value
,
to
,
0
);
__
inc
(
to
,
2
);
__
dec
(
count
,
1
<<
(
shift
-
1
));
__
BIND
(
L_skip_align2
);
}
#ifdef _LP64
if
(
!
aligned
)
{
#endif
// align to 8 bytes, we know we are 4 byte aligned to start
__
andcc
(
to
,
7
,
G0
);
__
br
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_fill_32_bytes
);
__
delayed
()
->
nop
();
__
stw
(
value
,
to
,
0
);
__
inc
(
to
,
4
);
__
dec
(
count
,
1
<<
shift
);
__
BIND
(
L_fill_32_bytes
);
#ifdef _LP64
}
#endif
Label
L_check_fill_8_bytes
;
// Fill 32-byte chunks
__
subcc
(
count
,
8
<<
shift
,
count
);
__
brx
(
Assembler
::
less
,
false
,
Assembler
::
pt
,
L_check_fill_8_bytes
);
__
delayed
()
->
nop
();
if
(
t
==
T_INT
)
{
// Zero extend value
__
srl
(
value
,
0
,
value
);
}
if
(
t
==
T_BYTE
||
t
==
T_SHORT
||
t
==
T_INT
)
{
__
sllx
(
value
,
32
,
O3
);
__
or3
(
value
,
O3
,
value
);
}
Label
L_fill_32_bytes_loop
;
__
align
(
16
);
__
BIND
(
L_fill_32_bytes_loop
);
__
stx
(
value
,
to
,
0
);
__
stx
(
value
,
to
,
8
);
__
stx
(
value
,
to
,
16
);
__
stx
(
value
,
to
,
24
);
__
subcc
(
count
,
8
<<
shift
,
count
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_fill_32_bytes_loop
);
__
delayed
()
->
add
(
to
,
32
,
to
);
__
BIND
(
L_check_fill_8_bytes
);
__
addcc
(
count
,
8
<<
shift
,
count
);
__
brx
(
Assembler
::
zero
,
false
,
Assembler
::
pn
,
L_exit
);
__
delayed
()
->
subcc
(
count
,
1
<<
(
shift
+
1
),
count
);
__
brx
(
Assembler
::
less
,
false
,
Assembler
::
pn
,
L_fill_4_bytes
);
__
delayed
()
->
andcc
(
count
,
1
<<
shift
,
G0
);
//
// length is too short, just fill 8 bytes at a time
//
Label
L_fill_8_bytes_loop
;
__
BIND
(
L_fill_8_bytes_loop
);
__
stx
(
value
,
to
,
0
);
__
subcc
(
count
,
1
<<
(
shift
+
1
),
count
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pn
,
L_fill_8_bytes_loop
);
__
delayed
()
->
add
(
to
,
8
,
to
);
// fill trailing 4 bytes
__
andcc
(
count
,
1
<<
shift
,
G0
);
// in delay slot of branches
__
BIND
(
L_fill_4_bytes
);
__
brx
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_fill_2_bytes
);
if
(
t
==
T_BYTE
||
t
==
T_SHORT
)
{
__
delayed
()
->
andcc
(
count
,
1
<<
(
shift
-
1
),
G0
);
}
else
{
__
delayed
()
->
nop
();
}
__
stw
(
value
,
to
,
0
);
if
(
t
==
T_BYTE
||
t
==
T_SHORT
)
{
__
inc
(
to
,
4
);
// fill trailing 2 bytes
__
andcc
(
count
,
1
<<
(
shift
-
1
),
G0
);
// in delay slot of branches
__
BIND
(
L_fill_2_bytes
);
__
brx
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_fill_byte
);
__
delayed
()
->
andcc
(
count
,
1
,
count
);
__
sth
(
value
,
to
,
0
);
if
(
t
==
T_BYTE
)
{
__
inc
(
to
,
2
);
// fill trailing byte
__
andcc
(
count
,
1
,
count
);
// in delay slot of branches
__
BIND
(
L_fill_byte
);
__
brx
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_exit
);
__
delayed
()
->
nop
();
__
stb
(
value
,
to
,
0
);
}
else
{
__
BIND
(
L_fill_byte
);
}
}
else
{
__
BIND
(
L_fill_2_bytes
);
}
__
BIND
(
L_exit
);
__
retl
();
__
delayed
()
->
mov
(
G0
,
O0
);
// return 0
return
start
;
}
//
// Generate stub for conjoint short copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
...
...
@@ -2855,6 +3034,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines
::
_checkcast_arraycopy
=
generate_checkcast_copy
(
"checkcast_arraycopy"
);
StubRoutines
::
_unsafe_arraycopy
=
generate_unsafe_copy
(
"unsafe_arraycopy"
);
StubRoutines
::
_generic_arraycopy
=
generate_generic_copy
(
"generic_arraycopy"
);
StubRoutines
::
_jbyte_fill
=
generate_fill
(
T_BYTE
,
false
,
"jbyte_fill"
);
StubRoutines
::
_jshort_fill
=
generate_fill
(
T_SHORT
,
false
,
"jshort_fill"
);
StubRoutines
::
_jint_fill
=
generate_fill
(
T_INT
,
false
,
"jint_fill"
);
StubRoutines
::
_arrayof_jbyte_fill
=
generate_fill
(
T_BYTE
,
true
,
"arrayof_jbyte_fill"
);
StubRoutines
::
_arrayof_jshort_fill
=
generate_fill
(
T_SHORT
,
true
,
"arrayof_jshort_fill"
);
StubRoutines
::
_arrayof_jint_fill
=
generate_fill
(
T_INT
,
true
,
"arrayof_jint_fill"
);
}
void
generate_initial
()
{
...
...
hotspot/src/cpu/x86/vm/assembler_x86.cpp
浏览文件 @
6d9460f7
...
...
@@ -8767,6 +8767,186 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
bind
(
DONE
);
}
#ifdef PRODUCT
#define BLOCK_COMMENT(str)
/* nothing */
#else
#define BLOCK_COMMENT(str) block_comment(str)
#endif
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
void
MacroAssembler
::
generate_fill
(
BasicType
t
,
bool
aligned
,
Register
to
,
Register
value
,
Register
count
,
Register
rtmp
,
XMMRegister
xtmp
)
{
assert_different_registers
(
to
,
value
,
count
,
rtmp
);
Label
L_exit
,
L_skip_align1
,
L_skip_align2
,
L_fill_byte
;
Label
L_fill_2_bytes
,
L_fill_4_bytes
;
int
shift
=
-
1
;
switch
(
t
)
{
case
T_BYTE
:
shift
=
2
;
break
;
case
T_SHORT
:
shift
=
1
;
break
;
case
T_INT
:
shift
=
0
;
break
;
default:
ShouldNotReachHere
();
}
if
(
t
==
T_BYTE
)
{
andl
(
value
,
0xff
);
movl
(
rtmp
,
value
);
shll
(
rtmp
,
8
);
orl
(
value
,
rtmp
);
}
if
(
t
==
T_SHORT
)
{
andl
(
value
,
0xffff
);
}
if
(
t
==
T_BYTE
||
t
==
T_SHORT
)
{
movl
(
rtmp
,
value
);
shll
(
rtmp
,
16
);
orl
(
value
,
rtmp
);
}
cmpl
(
count
,
2
<<
shift
);
// Short arrays (< 8 bytes) fill by element
jcc
(
Assembler
::
below
,
L_fill_4_bytes
);
// use unsigned cmp
if
(
!
UseUnalignedLoadStores
&&
!
aligned
&&
(
t
==
T_BYTE
||
t
==
T_SHORT
))
{
// align source address at 4 bytes address boundary
if
(
t
==
T_BYTE
)
{
// One byte misalignment happens only for byte arrays
testptr
(
to
,
1
);
jccb
(
Assembler
::
zero
,
L_skip_align1
);
movb
(
Address
(
to
,
0
),
value
);
increment
(
to
);
decrement
(
count
);
BIND
(
L_skip_align1
);
}
// Two bytes misalignment happens only for byte and short (char) arrays
testptr
(
to
,
2
);
jccb
(
Assembler
::
zero
,
L_skip_align2
);
movw
(
Address
(
to
,
0
),
value
);
addptr
(
to
,
2
);
subl
(
count
,
1
<<
(
shift
-
1
));
BIND
(
L_skip_align2
);
}
if
(
UseSSE
<
2
)
{
Label
L_fill_32_bytes_loop
,
L_check_fill_8_bytes
,
L_fill_8_bytes_loop
,
L_fill_8_bytes
;
// Fill 32-byte chunks
subl
(
count
,
8
<<
shift
);
jcc
(
Assembler
::
less
,
L_check_fill_8_bytes
);
align
(
16
);
BIND
(
L_fill_32_bytes_loop
);
for
(
int
i
=
0
;
i
<
32
;
i
+=
4
)
{
movl
(
Address
(
to
,
i
),
value
);
}
addptr
(
to
,
32
);
subl
(
count
,
8
<<
shift
);
jcc
(
Assembler
::
greaterEqual
,
L_fill_32_bytes_loop
);
BIND
(
L_check_fill_8_bytes
);
addl
(
count
,
8
<<
shift
);
jccb
(
Assembler
::
zero
,
L_exit
);
jmpb
(
L_fill_8_bytes
);
//
// length is too short, just fill qwords
//
BIND
(
L_fill_8_bytes_loop
);
movl
(
Address
(
to
,
0
),
value
);
movl
(
Address
(
to
,
4
),
value
);
addptr
(
to
,
8
);
BIND
(
L_fill_8_bytes
);
subl
(
count
,
1
<<
(
shift
+
1
));
jcc
(
Assembler
::
greaterEqual
,
L_fill_8_bytes_loop
);
// fall through to fill 4 bytes
}
else
{
Label
L_fill_32_bytes
;
if
(
!
UseUnalignedLoadStores
)
{
// align to 8 bytes, we know we are 4 byte aligned to start
testptr
(
to
,
4
);
jccb
(
Assembler
::
zero
,
L_fill_32_bytes
);
movl
(
Address
(
to
,
0
),
value
);
addptr
(
to
,
4
);
subl
(
count
,
1
<<
shift
);
}
BIND
(
L_fill_32_bytes
);
{
assert
(
UseSSE
>=
2
,
"supported cpu only"
);
Label
L_fill_32_bytes_loop
,
L_check_fill_8_bytes
,
L_fill_8_bytes_loop
,
L_fill_8_bytes
;
// Fill 32-byte chunks
movdl
(
xtmp
,
value
);
pshufd
(
xtmp
,
xtmp
,
0
);
subl
(
count
,
8
<<
shift
);
jcc
(
Assembler
::
less
,
L_check_fill_8_bytes
);
align
(
16
);
BIND
(
L_fill_32_bytes_loop
);
if
(
UseUnalignedLoadStores
)
{
movdqu
(
Address
(
to
,
0
),
xtmp
);
movdqu
(
Address
(
to
,
16
),
xtmp
);
}
else
{
movq
(
Address
(
to
,
0
),
xtmp
);
movq
(
Address
(
to
,
8
),
xtmp
);
movq
(
Address
(
to
,
16
),
xtmp
);
movq
(
Address
(
to
,
24
),
xtmp
);
}
addptr
(
to
,
32
);
subl
(
count
,
8
<<
shift
);
jcc
(
Assembler
::
greaterEqual
,
L_fill_32_bytes_loop
);
BIND
(
L_check_fill_8_bytes
);
addl
(
count
,
8
<<
shift
);
jccb
(
Assembler
::
zero
,
L_exit
);
jmpb
(
L_fill_8_bytes
);
//
// length is too short, just fill qwords
//
BIND
(
L_fill_8_bytes_loop
);
movq
(
Address
(
to
,
0
),
xtmp
);
addptr
(
to
,
8
);
BIND
(
L_fill_8_bytes
);
subl
(
count
,
1
<<
(
shift
+
1
));
jcc
(
Assembler
::
greaterEqual
,
L_fill_8_bytes_loop
);
}
}
// fill trailing 4 bytes
BIND
(
L_fill_4_bytes
);
testl
(
count
,
1
<<
shift
);
jccb
(
Assembler
::
zero
,
L_fill_2_bytes
);
movl
(
Address
(
to
,
0
),
value
);
if
(
t
==
T_BYTE
||
t
==
T_SHORT
)
{
addptr
(
to
,
4
);
BIND
(
L_fill_2_bytes
);
// fill trailing 2 bytes
testl
(
count
,
1
<<
(
shift
-
1
));
jccb
(
Assembler
::
zero
,
L_fill_byte
);
movw
(
Address
(
to
,
0
),
value
);
if
(
t
==
T_BYTE
)
{
addptr
(
to
,
2
);
BIND
(
L_fill_byte
);
// fill trailing byte
testl
(
count
,
1
);
jccb
(
Assembler
::
zero
,
L_exit
);
movb
(
Address
(
to
,
0
),
value
);
}
else
{
BIND
(
L_fill_byte
);
}
}
else
{
BIND
(
L_fill_2_bytes
);
}
BIND
(
L_exit
);
}
#undef BIND
#undef BLOCK_COMMENT
Assembler
::
Condition
MacroAssembler
::
negate_condition
(
Assembler
::
Condition
cond
)
{
switch
(
cond
)
{
// Note some conditions are synonyms for others
...
...
hotspot/src/cpu/x86/vm/assembler_x86.hpp
浏览文件 @
6d9460f7
...
...
@@ -2242,6 +2242,11 @@ public:
Register
limit
,
Register
result
,
Register
chr
,
XMMRegister
vec1
,
XMMRegister
vec2
);
// Fill primitive arrays
void
generate_fill
(
BasicType
t
,
bool
aligned
,
Register
to
,
Register
value
,
Register
count
,
Register
rtmp
,
XMMRegister
xtmp
);
#undef VIRTUAL
};
...
...
hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
浏览文件 @
6d9460f7
...
...
@@ -1039,6 +1039,33 @@ class StubGenerator: public StubCodeGenerator {
}
address
generate_fill
(
BasicType
t
,
bool
aligned
,
const
char
*
name
)
{
__
align
(
CodeEntryAlignment
);
StubCodeMark
mark
(
this
,
"StubRoutines"
,
name
);
address
start
=
__
pc
();
BLOCK_COMMENT
(
"Entry:"
);
const
Register
to
=
rdi
;
// source array address
const
Register
value
=
rdx
;
// value
const
Register
count
=
rsi
;
// elements count
__
enter
();
// required for proper stackwalking of RuntimeStub frame
__
push
(
rsi
);
__
push
(
rdi
);
__
movptr
(
to
,
Address
(
rsp
,
12
+
4
));
__
movl
(
value
,
Address
(
rsp
,
12
+
8
));
__
movl
(
count
,
Address
(
rsp
,
12
+
12
));
__
generate_fill
(
t
,
aligned
,
to
,
value
,
count
,
rax
,
xmm0
);
__
pop
(
rdi
);
__
pop
(
rsi
);
__
leave
();
// required for proper stackwalking of RuntimeStub frame
__
ret
(
0
);
return
start
;
}
address
generate_conjoint_copy
(
BasicType
t
,
bool
aligned
,
Address
::
ScaleFactor
sf
,
address
nooverlap_target
,
...
...
@@ -2001,6 +2028,13 @@ class StubGenerator: public StubCodeGenerator {
generate_conjoint_long_copy
(
entry
,
&
entry_jlong_arraycopy
,
"jlong_arraycopy"
);
StubRoutines
::
_jbyte_fill
=
generate_fill
(
T_BYTE
,
false
,
"jbyte_fill"
);
StubRoutines
::
_jshort_fill
=
generate_fill
(
T_SHORT
,
false
,
"jshort_fill"
);
StubRoutines
::
_jint_fill
=
generate_fill
(
T_INT
,
false
,
"jint_fill"
);
StubRoutines
::
_arrayof_jbyte_fill
=
generate_fill
(
T_BYTE
,
true
,
"arrayof_jbyte_fill"
);
StubRoutines
::
_arrayof_jshort_fill
=
generate_fill
(
T_SHORT
,
true
,
"arrayof_jshort_fill"
);
StubRoutines
::
_arrayof_jint_fill
=
generate_fill
(
T_INT
,
true
,
"arrayof_jint_fill"
);
StubRoutines
::
_arrayof_jint_disjoint_arraycopy
=
StubRoutines
::
_jint_disjoint_arraycopy
;
StubRoutines
::
_arrayof_oop_disjoint_arraycopy
=
...
...
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
浏览文件 @
6d9460f7
...
...
@@ -1625,6 +1625,26 @@ class StubGenerator: public StubCodeGenerator {
return
start
;
}
address
generate_fill
(
BasicType
t
,
bool
aligned
,
const
char
*
name
)
{
__
align
(
CodeEntryAlignment
);
StubCodeMark
mark
(
this
,
"StubRoutines"
,
name
);
address
start
=
__
pc
();
BLOCK_COMMENT
(
"Entry:"
);
const
Register
to
=
c_rarg0
;
// source array address
const
Register
value
=
c_rarg1
;
// value
const
Register
count
=
c_rarg2
;
// elements count
__
enter
();
// required for proper stackwalking of RuntimeStub frame
__
generate_fill
(
t
,
aligned
,
to
,
value
,
count
,
rax
,
xmm0
);
__
leave
();
// required for proper stackwalking of RuntimeStub frame
__
ret
(
0
);
return
start
;
}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
...
...
@@ -2712,6 +2732,13 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines
::
_unsafe_arraycopy
=
generate_unsafe_copy
(
"unsafe_arraycopy"
);
StubRoutines
::
_generic_arraycopy
=
generate_generic_copy
(
"generic_arraycopy"
);
StubRoutines
::
_jbyte_fill
=
generate_fill
(
T_BYTE
,
false
,
"jbyte_fill"
);
StubRoutines
::
_jshort_fill
=
generate_fill
(
T_SHORT
,
false
,
"jshort_fill"
);
StubRoutines
::
_jint_fill
=
generate_fill
(
T_INT
,
false
,
"jint_fill"
);
StubRoutines
::
_arrayof_jbyte_fill
=
generate_fill
(
T_BYTE
,
true
,
"arrayof_jbyte_fill"
);
StubRoutines
::
_arrayof_jshort_fill
=
generate_fill
(
T_SHORT
,
true
,
"arrayof_jshort_fill"
);
StubRoutines
::
_arrayof_jint_fill
=
generate_fill
(
T_INT
,
true
,
"arrayof_jint_fill"
);
// We don't generate specialized code for HeapWord-aligned source
// arrays, so just use the code we've already generated
StubRoutines
::
_arrayof_jbyte_disjoint_arraycopy
=
StubRoutines
::
_jbyte_disjoint_arraycopy
;
...
...
hotspot/src/share/vm/includeDB_compiler2
浏览文件 @
6d9460f7
...
...
@@ -624,6 +624,7 @@ loopTransform.cpp divnode.hpp
loopTransform.cpp loopnode.hpp
loopTransform.cpp mulnode.hpp
loopTransform.cpp rootnode.hpp
loopTransform.cpp runtime.hpp
loopTransform.cpp subnode.hpp
loopUnswitch.cpp allocation.inline.hpp
...
...
hotspot/src/share/vm/opto/addnode.cpp
浏览文件 @
6d9460f7
...
...
@@ -705,6 +705,9 @@ int AddPNode::unpack_offsets(Node* elements[], int length) {
}
addr
=
addr
->
in
(
AddPNode
::
Address
);
}
if
(
addr
!=
base
)
{
return
-
1
;
}
return
count
;
}
...
...
hotspot/src/share/vm/opto/c2_globals.hpp
浏览文件 @
6d9460f7
...
...
@@ -157,6 +157,12 @@
develop(bool, TraceLoopPredicate, false, \
"Trace generation of loop predicates") \
\
product(bool, OptimizeFill, false, \
"convert fill/copy loops into intrinsic") \
\
develop(bool, TraceOptimizeFill, false, \
"print detailed information about fill conversion") \
\
develop(bool, OptoCoalesce, true, \
"Use Conservative Copy Coalescing in the Register Allocator") \
\
...
...
hotspot/src/share/vm/opto/loopTransform.cpp
浏览文件 @
6d9460f7
...
...
@@ -2049,11 +2049,18 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari
if
(
cmp
->
Opcode
()
!=
Op_CmpU
)
{
return
false
;
}
if
(
cmp
->
in
(
2
)
->
Opcode
()
!=
Op_LoadRange
)
{
return
false
;
Node
*
range
=
cmp
->
in
(
2
);
if
(
range
->
Opcode
()
!=
Op_LoadRange
)
{
const
TypeInt
*
tint
=
phase
->
_igvn
.
type
(
range
)
->
isa_int
();
if
(
!
OptimizeFill
||
tint
==
NULL
||
tint
->
empty
()
||
tint
->
_lo
<
0
)
{
// Allow predication on positive values that aren't LoadRanges.
// This allows optimization of loops where the length of the
// array is a known value and doesn't need to be loaded back
// from the array.
return
false
;
}
}
LoadRangeNode
*
lr
=
(
LoadRangeNode
*
)
cmp
->
in
(
2
);
if
(
!
invar
.
is_invariant
(
lr
))
{
// loadRange must be invariant
if
(
!
invar
.
is_invariant
(
range
))
{
return
false
;
}
Node
*
iv
=
_head
->
as_CountedLoop
()
->
phi
();
...
...
@@ -2248,9 +2255,9 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
const
Node
*
cmp
=
bol
->
in
(
1
)
->
as_Cmp
();
Node
*
idx
=
cmp
->
in
(
1
);
assert
(
!
invar
.
is_invariant
(
idx
),
"index is variant"
);
assert
(
cmp
->
in
(
2
)
->
Opcode
()
==
Op_LoadRange
,
"must be"
);
Node
*
ld_rng
=
cmp
->
in
(
2
);
// LoadRangeNode
assert
(
invar
.
is_invariant
(
ld_rng
),
"load
range must be invariant"
);
assert
(
cmp
->
in
(
2
)
->
Opcode
()
==
Op_LoadRange
||
OptimizeFill
,
"must be"
);
Node
*
rng
=
cmp
->
in
(
2
);
assert
(
invar
.
is_invariant
(
rng
),
"
range must be invariant"
);
int
scale
=
1
;
Node
*
offset
=
zero
;
bool
ok
=
is_scaled_iv_plus_offset
(
idx
,
cl
->
phi
(),
&
scale
,
&
offset
);
...
...
@@ -2271,21 +2278,21 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
// Perform cloning to keep Invariance state correct since the
// late schedule will place invariant things in the loop.
ld_rng
=
invar
.
clone
(
ld_
rng
,
ctrl
);
rng
=
invar
.
clone
(
rng
,
ctrl
);
if
(
offset
&&
offset
!=
zero
)
{
assert
(
invar
.
is_invariant
(
offset
),
"offset must be loop invariant"
);
offset
=
invar
.
clone
(
offset
,
ctrl
);
}
// Test the lower bound
Node
*
lower_bound_bol
=
rc_predicate
(
ctrl
,
scale
,
offset
,
init
,
limit
,
stride
,
ld_
rng
,
false
);
Node
*
lower_bound_bol
=
rc_predicate
(
ctrl
,
scale
,
offset
,
init
,
limit
,
stride
,
rng
,
false
);
IfNode
*
lower_bound_iff
=
lower_bound_proj
->
in
(
0
)
->
as_If
();
_igvn
.
hash_delete
(
lower_bound_iff
);
lower_bound_iff
->
set_req
(
1
,
lower_bound_bol
);
if
(
TraceLoopPredicate
)
tty
->
print_cr
(
"lower bound check if: %d"
,
lower_bound_iff
->
_idx
);
// Test the upper bound
Node
*
upper_bound_bol
=
rc_predicate
(
ctrl
,
scale
,
offset
,
init
,
limit
,
stride
,
ld_
rng
,
true
);
Node
*
upper_bound_bol
=
rc_predicate
(
ctrl
,
scale
,
offset
,
init
,
limit
,
stride
,
rng
,
true
);
IfNode
*
upper_bound_iff
=
upper_bound_proj
->
in
(
0
)
->
as_If
();
_igvn
.
hash_delete
(
upper_bound_iff
);
upper_bound_iff
->
set_req
(
1
,
upper_bound_bol
);
...
...
@@ -2366,3 +2373,348 @@ bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) {
return
hoisted
;
}
// Process all the loops in the loop tree and replace any fill
// patterns with an intrisc version.
bool
PhaseIdealLoop
::
do_intrinsify_fill
()
{
bool
changed
=
false
;
for
(
LoopTreeIterator
iter
(
_ltree_root
);
!
iter
.
done
();
iter
.
next
())
{
IdealLoopTree
*
lpt
=
iter
.
current
();
changed
|=
intrinsify_fill
(
lpt
);
}
return
changed
;
}
// Examine an inner loop looking for a a single store of an invariant
// value in a unit stride loop,
bool
PhaseIdealLoop
::
match_fill_loop
(
IdealLoopTree
*
lpt
,
Node
*&
store
,
Node
*&
store_value
,
Node
*&
shift
,
Node
*&
con
)
{
const
char
*
msg
=
NULL
;
Node
*
msg_node
=
NULL
;
store_value
=
NULL
;
con
=
NULL
;
shift
=
NULL
;
// Process the loop looking for stores. If there are multiple
// stores or extra control flow give at this point.
CountedLoopNode
*
head
=
lpt
->
_head
->
as_CountedLoop
();
for
(
uint
i
=
0
;
msg
==
NULL
&&
i
<
lpt
->
_body
.
size
();
i
++
)
{
Node
*
n
=
lpt
->
_body
.
at
(
i
);
if
(
n
->
outcnt
()
==
0
)
continue
;
// Ignore dead
if
(
n
->
is_Store
())
{
if
(
store
!=
NULL
)
{
msg
=
"multiple stores"
;
break
;
}
int
opc
=
n
->
Opcode
();
if
(
opc
==
Op_StoreP
||
opc
==
Op_StoreN
||
opc
==
Op_StoreCM
)
{
msg
=
"oop fills not handled"
;
break
;
}
Node
*
value
=
n
->
in
(
MemNode
::
ValueIn
);
if
(
!
lpt
->
is_invariant
(
value
))
{
msg
=
"variant store value"
;
}
store
=
n
;
store_value
=
value
;
}
else
if
(
n
->
is_If
()
&&
n
!=
head
->
loopexit
())
{
msg
=
"extra control flow"
;
msg_node
=
n
;
}
}
if
(
store
==
NULL
)
{
// No store in loop
return
false
;
}
if
(
msg
==
NULL
&&
head
->
stride_con
()
!=
1
)
{
// could handle negative strides too
if
(
head
->
stride_con
()
<
0
)
{
msg
=
"negative stride"
;
}
else
{
msg
=
"non-unit stride"
;
}
}
if
(
msg
==
NULL
&&
!
store
->
in
(
MemNode
::
Address
)
->
is_AddP
())
{
msg
=
"can't handle store address"
;
msg_node
=
store
->
in
(
MemNode
::
Address
);
}
// Make sure there is an appropriate fill routine
BasicType
t
=
store
->
as_Mem
()
->
memory_type
();
const
char
*
fill_name
;
if
(
msg
==
NULL
&&
StubRoutines
::
select_fill_function
(
t
,
false
,
fill_name
)
==
NULL
)
{
msg
=
"unsupported store"
;
msg_node
=
store
;
}
if
(
msg
!=
NULL
)
{
#ifndef PRODUCT
if
(
TraceOptimizeFill
)
{
tty
->
print_cr
(
"not fill intrinsic candidate: %s"
,
msg
);
if
(
msg_node
!=
NULL
)
msg_node
->
dump
();
}
#endif
return
false
;
}
// Make sure the address expression can be handled. It should be
// head->phi * elsize + con. head->phi might have a ConvI2L.
Node
*
elements
[
4
];
Node
*
conv
=
NULL
;
int
count
=
store
->
in
(
MemNode
::
Address
)
->
as_AddP
()
->
unpack_offsets
(
elements
,
ARRAY_SIZE
(
elements
));
for
(
int
e
=
0
;
e
<
count
;
e
++
)
{
Node
*
n
=
elements
[
e
];
if
(
n
->
is_Con
()
&&
con
==
NULL
)
{
con
=
n
;
}
else
if
(
n
->
Opcode
()
==
Op_LShiftX
&&
shift
==
NULL
)
{
Node
*
value
=
n
->
in
(
1
);
#ifdef _LP64
if
(
value
->
Opcode
()
==
Op_ConvI2L
)
{
conv
=
value
;
value
=
value
->
in
(
1
);
}
#endif
if
(
value
!=
head
->
phi
())
{
msg
=
"unhandled shift in address"
;
}
else
{
shift
=
n
;
assert
(
type2aelembytes
(
store
->
as_Mem
()
->
memory_type
(),
true
)
==
1
<<
shift
->
in
(
2
)
->
get_int
(),
"scale should match"
);
}
}
else
if
(
n
->
Opcode
()
==
Op_ConvI2L
&&
conv
==
NULL
)
{
if
(
n
->
in
(
1
)
==
head
->
phi
())
{
conv
=
n
;
}
else
{
msg
=
"unhandled input to ConvI2L"
;
}
}
else
if
(
n
==
head
->
phi
())
{
// no shift, check below for allowed cases
}
else
{
msg
=
"unhandled node in address"
;
msg_node
=
n
;
}
}
if
(
count
==
-
1
)
{
msg
=
"malformed address expression"
;
msg_node
=
store
;
}
// byte sized items won't have a shift
if
(
msg
==
NULL
&&
shift
==
NULL
&&
t
!=
T_BYTE
&&
t
!=
T_BOOLEAN
)
{
msg
=
"can't find shift"
;
msg_node
=
store
;
}
if
(
msg
!=
NULL
)
{
#ifndef PRODUCT
if
(
TraceOptimizeFill
)
{
tty
->
print_cr
(
"not fill intrinsic: %s"
,
msg
);
if
(
msg_node
!=
NULL
)
msg_node
->
dump
();
}
#endif
return
false
;
}
// No make sure all the other nodes in the loop can be handled
VectorSet
ok
(
Thread
::
current
()
->
resource_area
());
// store related values are ok
ok
.
set
(
store
->
_idx
);
ok
.
set
(
store
->
in
(
MemNode
::
Memory
)
->
_idx
);
// Loop structure is ok
ok
.
set
(
head
->
_idx
);
ok
.
set
(
head
->
loopexit
()
->
_idx
);
ok
.
set
(
head
->
phi
()
->
_idx
);
ok
.
set
(
head
->
incr
()
->
_idx
);
ok
.
set
(
head
->
loopexit
()
->
cmp_node
()
->
_idx
);
ok
.
set
(
head
->
loopexit
()
->
in
(
1
)
->
_idx
);
// Address elements are ok
if
(
con
)
ok
.
set
(
con
->
_idx
);
if
(
shift
)
ok
.
set
(
shift
->
_idx
);
if
(
conv
)
ok
.
set
(
conv
->
_idx
);
for
(
uint
i
=
0
;
msg
==
NULL
&&
i
<
lpt
->
_body
.
size
();
i
++
)
{
Node
*
n
=
lpt
->
_body
.
at
(
i
);
if
(
n
->
outcnt
()
==
0
)
continue
;
// Ignore dead
if
(
ok
.
test
(
n
->
_idx
))
continue
;
// Backedge projection is ok
if
(
n
->
is_IfTrue
()
&&
n
->
in
(
0
)
==
head
->
loopexit
())
continue
;
if
(
!
n
->
is_AddP
())
{
msg
=
"unhandled node"
;
msg_node
=
n
;
break
;
}
}
// Make sure no unexpected values are used outside the loop
for
(
uint
i
=
0
;
msg
==
NULL
&&
i
<
lpt
->
_body
.
size
();
i
++
)
{
Node
*
n
=
lpt
->
_body
.
at
(
i
);
// These values can be replaced with other nodes if they are used
// outside the loop.
if
(
n
==
store
||
n
==
head
->
loopexit
()
||
n
==
head
->
incr
())
continue
;
for
(
SimpleDUIterator
iter
(
n
);
iter
.
has_next
();
iter
.
next
())
{
Node
*
use
=
iter
.
get
();
if
(
!
lpt
->
_body
.
contains
(
use
))
{
msg
=
"node is used outside loop"
;
// lpt->_body.dump();
msg_node
=
n
;
break
;
}
}
}
#ifdef ASSERT
if
(
TraceOptimizeFill
)
{
if
(
msg
!=
NULL
)
{
tty
->
print_cr
(
"no fill intrinsic: %s"
,
msg
);
if
(
msg_node
!=
NULL
)
msg_node
->
dump
();
}
else
{
tty
->
print_cr
(
"fill intrinsic for:"
);
}
store
->
dump
();
if
(
Verbose
)
{
lpt
->
_body
.
dump
();
}
}
#endif
return
msg
==
NULL
;
}
bool
PhaseIdealLoop
::
intrinsify_fill
(
IdealLoopTree
*
lpt
)
{
// Only for counted inner loops
if
(
!
lpt
->
is_counted
()
||
!
lpt
->
is_inner
())
{
return
false
;
}
// Must have constant stride
CountedLoopNode
*
head
=
lpt
->
_head
->
as_CountedLoop
();
if
(
!
head
->
stride_is_con
()
||
!
head
->
is_normal_loop
())
{
return
false
;
}
// Check that the body only contains a store of a loop invariant
// value that is indexed by the loop phi.
Node
*
store
=
NULL
;
Node
*
store_value
=
NULL
;
Node
*
shift
=
NULL
;
Node
*
offset
=
NULL
;
if
(
!
match_fill_loop
(
lpt
,
store
,
store_value
,
shift
,
offset
))
{
return
false
;
}
// Now replace the whole loop body by a call to a fill routine that
// covers the same region as the loop.
Node
*
base
=
store
->
in
(
MemNode
::
Address
)
->
as_AddP
()
->
in
(
AddPNode
::
Base
);
// Build an expression for the beginning of the copy region
Node
*
index
=
head
->
init_trip
();
#ifdef _LP64
index
=
new
(
C
,
2
)
ConvI2LNode
(
index
);
_igvn
.
register_new_node_with_optimizer
(
index
);
#endif
if
(
shift
!=
NULL
)
{
// byte arrays don't require a shift but others do.
index
=
new
(
C
,
3
)
LShiftXNode
(
index
,
shift
->
in
(
2
));
_igvn
.
register_new_node_with_optimizer
(
index
);
}
index
=
new
(
C
,
4
)
AddPNode
(
base
,
base
,
index
);
_igvn
.
register_new_node_with_optimizer
(
index
);
Node
*
from
=
new
(
C
,
4
)
AddPNode
(
base
,
index
,
offset
);
_igvn
.
register_new_node_with_optimizer
(
from
);
// Compute the number of elements to copy
Node
*
len
=
new
(
C
,
3
)
SubINode
(
head
->
limit
(),
head
->
init_trip
());
_igvn
.
register_new_node_with_optimizer
(
len
);
BasicType
t
=
store
->
as_Mem
()
->
memory_type
();
bool
aligned
=
false
;
if
(
offset
!=
NULL
&&
head
->
init_trip
()
->
is_Con
())
{
int
element_size
=
type2aelembytes
(
t
);
aligned
=
(
offset
->
find_intptr_t_type
()
->
get_con
()
+
head
->
init_trip
()
->
get_int
()
*
element_size
)
%
HeapWordSize
==
0
;
}
// Build a call to the fill routine
const
char
*
fill_name
;
address
fill
=
StubRoutines
::
select_fill_function
(
t
,
aligned
,
fill_name
);
assert
(
fill
!=
NULL
,
"what?"
);
// Convert float/double to int/long for fill routines
if
(
t
==
T_FLOAT
)
{
store_value
=
new
(
C
,
2
)
MoveF2INode
(
store_value
);
_igvn
.
register_new_node_with_optimizer
(
store_value
);
}
else
if
(
t
==
T_DOUBLE
)
{
store_value
=
new
(
C
,
2
)
MoveD2LNode
(
store_value
);
_igvn
.
register_new_node_with_optimizer
(
store_value
);
}
Node
*
mem_phi
=
store
->
in
(
MemNode
::
Memory
);
Node
*
result_ctrl
;
Node
*
result_mem
;
const
TypeFunc
*
call_type
=
OptoRuntime
::
array_fill_Type
();
int
size
=
call_type
->
domain
()
->
cnt
();
CallLeafNode
*
call
=
new
(
C
,
size
)
CallLeafNoFPNode
(
call_type
,
fill
,
fill_name
,
TypeAryPtr
::
get_array_body_type
(
t
));
call
->
init_req
(
TypeFunc
::
Parms
+
0
,
from
);
call
->
init_req
(
TypeFunc
::
Parms
+
1
,
store_value
);
call
->
init_req
(
TypeFunc
::
Parms
+
2
,
len
);
call
->
init_req
(
TypeFunc
::
Control
,
head
->
init_control
());
call
->
init_req
(
TypeFunc
::
I_O
,
C
->
top
()
)
;
// does no i/o
call
->
init_req
(
TypeFunc
::
Memory
,
mem_phi
->
in
(
LoopNode
::
EntryControl
)
);
call
->
init_req
(
TypeFunc
::
ReturnAdr
,
C
->
start
()
->
proj_out
(
TypeFunc
::
ReturnAdr
)
);
call
->
init_req
(
TypeFunc
::
FramePtr
,
C
->
start
()
->
proj_out
(
TypeFunc
::
FramePtr
)
);
_igvn
.
register_new_node_with_optimizer
(
call
);
result_ctrl
=
new
(
C
,
1
)
ProjNode
(
call
,
TypeFunc
::
Control
);
_igvn
.
register_new_node_with_optimizer
(
result_ctrl
);
result_mem
=
new
(
C
,
1
)
ProjNode
(
call
,
TypeFunc
::
Memory
);
_igvn
.
register_new_node_with_optimizer
(
result_mem
);
// If this fill is tightly coupled to an allocation and overwrites
// the whole body, allow it to take over the zeroing.
AllocateNode
*
alloc
=
AllocateNode
::
Ideal_allocation
(
base
,
this
);
if
(
alloc
!=
NULL
&&
alloc
->
is_AllocateArray
())
{
Node
*
length
=
alloc
->
as_AllocateArray
()
->
Ideal_length
();
if
(
head
->
limit
()
==
length
&&
head
->
init_trip
()
==
_igvn
.
intcon
(
0
))
{
if
(
TraceOptimizeFill
)
{
tty
->
print_cr
(
"Eliminated zeroing in allocation"
);
}
alloc
->
maybe_set_complete
(
&
_igvn
);
}
else
{
#ifdef ASSERT
if
(
TraceOptimizeFill
)
{
tty
->
print_cr
(
"filling array but bounds don't match"
);
alloc
->
dump
();
head
->
init_trip
()
->
dump
();
head
->
limit
()
->
dump
();
length
->
dump
();
}
#endif
}
}
// Redirect the old control and memory edges that are outside the loop.
Node
*
exit
=
head
->
loopexit
()
->
proj_out
(
0
);
_igvn
.
replace_node
(
exit
,
result_ctrl
);
_igvn
.
replace_node
(
store
,
result_mem
);
// Any uses the increment outside of the loop become the loop limit.
_igvn
.
replace_node
(
head
->
incr
(),
head
->
limit
());
// Disconnect the head from the loop.
for
(
uint
i
=
0
;
i
<
lpt
->
_body
.
size
();
i
++
)
{
Node
*
n
=
lpt
->
_body
.
at
(
i
);
_igvn
.
replace_node
(
n
,
C
->
top
());
}
return
true
;
}
hotspot/src/share/vm/opto/loopnode.cpp
浏览文件 @
6d9460f7
...
...
@@ -1673,6 +1673,12 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
_ltree_root
->
_child
->
loop_predication
(
this
);
}
if
(
OptimizeFill
&&
UseLoopPredicate
&&
C
->
has_loops
()
&&
!
C
->
major_progress
())
{
if
(
do_intrinsify_fill
())
{
C
->
set_major_progress
();
}
}
// Perform iteration-splitting on inner loops. Split iterations to avoid
// range checks or one-shot null checks.
...
...
hotspot/src/share/vm/opto/loopnode.hpp
浏览文件 @
6d9460f7
...
...
@@ -937,6 +937,12 @@ public:
// same block. Split thru the Region.
void
do_split_if
(
Node
*
iff
);
// Conversion of fill/copy patterns into intrisic versions
bool
do_intrinsify_fill
();
bool
intrinsify_fill
(
IdealLoopTree
*
lpt
);
bool
match_fill_loop
(
IdealLoopTree
*
lpt
,
Node
*&
store
,
Node
*&
store_value
,
Node
*&
shift
,
Node
*&
offset
);
private:
// Return a type based on condition control flow
const
TypeInt
*
filtered_type
(
Node
*
n
,
Node
*
n_ctrl
);
...
...
hotspot/src/share/vm/opto/runtime.cpp
浏览文件 @
6d9460f7
...
...
@@ -645,6 +645,22 @@ const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
}
const
TypeFunc
*
OptoRuntime
::
array_fill_Type
()
{
// create input type (domain)
const
Type
**
fields
=
TypeTuple
::
fields
(
3
);
fields
[
TypeFunc
::
Parms
+
0
]
=
TypePtr
::
NOTNULL
;
fields
[
TypeFunc
::
Parms
+
1
]
=
TypeInt
::
INT
;
fields
[
TypeFunc
::
Parms
+
2
]
=
TypeInt
::
INT
;
const
TypeTuple
*
domain
=
TypeTuple
::
make
(
TypeFunc
::
Parms
+
3
,
fields
);
// create result type
fields
=
TypeTuple
::
fields
(
1
);
fields
[
TypeFunc
::
Parms
+
0
]
=
NULL
;
// void
const
TypeTuple
*
range
=
TypeTuple
::
make
(
TypeFunc
::
Parms
,
fields
);
return
TypeFunc
::
make
(
domain
,
range
);
}
//------------- Interpreter state access for on stack replacement
const
TypeFunc
*
OptoRuntime
::
osr_end_Type
()
{
// create input type (domain)
...
...
hotspot/src/share/vm/opto/runtime.hpp
浏览文件 @
6d9460f7
...
...
@@ -260,6 +260,8 @@ private:
static
const
TypeFunc
*
generic_arraycopy_Type
();
static
const
TypeFunc
*
slow_arraycopy_Type
();
// the full routine
static
const
TypeFunc
*
array_fill_Type
();
// leaf on stack replacement interpreter accessor types
static
const
TypeFunc
*
osr_end_Type
();
...
...
hotspot/src/share/vm/runtime/arguments.cpp
浏览文件 @
6d9460f7
...
...
@@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() {
if
(
AggressiveOpts
&&
FLAG_IS_DEFAULT
(
OptimizeStringConcat
))
{
FLAG_SET_DEFAULT
(
OptimizeStringConcat
,
true
);
}
if
(
AggressiveOpts
&&
FLAG_IS_DEFAULT
(
OptimizeFill
))
{
FLAG_SET_DEFAULT
(
OptimizeFill
,
true
);
}
#endif
if
(
AggressiveOpts
)
{
...
...
hotspot/src/share/vm/runtime/stubRoutines.cpp
浏览文件 @
6d9460f7
...
...
@@ -97,6 +97,15 @@ address StubRoutines::_checkcast_arraycopy = NULL;
address
StubRoutines
::
_unsafe_arraycopy
=
NULL
;
address
StubRoutines
::
_generic_arraycopy
=
NULL
;
address
StubRoutines
::
_jbyte_fill
;
address
StubRoutines
::
_jshort_fill
;
address
StubRoutines
::
_jint_fill
;
address
StubRoutines
::
_arrayof_jbyte_fill
;
address
StubRoutines
::
_arrayof_jshort_fill
;
address
StubRoutines
::
_arrayof_jint_fill
;
double
(
*
StubRoutines
::
_intrinsic_log
)(
double
)
=
NULL
;
double
(
*
StubRoutines
::
_intrinsic_log10
)(
double
)
=
NULL
;
double
(
*
StubRoutines
::
_intrinsic_exp
)(
double
)
=
NULL
;
...
...
@@ -195,6 +204,46 @@ void StubRoutines::initialize2() {
#undef TEST_ARRAYCOPY
#define TEST_FILL(type) \
if (_##type##_fill != NULL) { \
union { \
double d; \
type body[96]; \
} s; \
\
int v = 32; \
for (int offset = -2; offset <= 2; offset++) { \
for (int i = 0; i < 96; i++) { \
s.body[i] = 1; \
} \
type* start = s.body + 8 + offset; \
for (int aligned = 0; aligned < 2; aligned++) { \
if (aligned) { \
if (((intptr_t)start) % HeapWordSize == 0) { \
((void (*)(type*, int, int))StubRoutines::_arrayof_##type##_fill)(start, v, 80); \
} else { \
continue; \
} \
} else { \
((void (*)(type*, int, int))StubRoutines::_##type##_fill)(start, v, 80); \
} \
for (int i = 0; i < 96; i++) { \
if (i < (8 + offset) || i >= (88 + offset)) { \
assert(s.body[i] == 1, "what?"); \
} else { \
assert(s.body[i] == 32, "what?"); \
} \
} \
} \
} \
} \
TEST_FILL
(
jbyte
);
TEST_FILL
(
jshort
);
TEST_FILL
(
jint
);
#undef TEST_FILL
#define TEST_COPYRTN(type) \
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::conjoint_##type##s_atomic), sizeof(type)); \
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::arrayof_conjoint_##type##s), (int)MAX2(sizeof(HeapWord), sizeof(type)))
...
...
@@ -315,3 +364,39 @@ JRT_LEAF(void, StubRoutines::arrayof_oop_copy(HeapWord* src, HeapWord* dest, siz
Copy
::
arrayof_conjoint_oops
(
src
,
dest
,
count
);
gen_arraycopy_barrier
((
oop
*
)
dest
,
count
);
JRT_END
address
StubRoutines
::
select_fill_function
(
BasicType
t
,
bool
aligned
,
const
char
*
&
name
)
{
#define RETURN_STUB(xxx_fill) { \
name = #xxx_fill; \
return StubRoutines::xxx_fill(); }
switch
(
t
)
{
case
T_BYTE
:
case
T_BOOLEAN
:
if
(
!
aligned
)
RETURN_STUB
(
jbyte_fill
);
RETURN_STUB
(
arrayof_jbyte_fill
);
case
T_CHAR
:
case
T_SHORT
:
if
(
!
aligned
)
RETURN_STUB
(
jshort_fill
);
RETURN_STUB
(
arrayof_jshort_fill
);
case
T_INT
:
case
T_FLOAT
:
if
(
!
aligned
)
RETURN_STUB
(
jint_fill
);
RETURN_STUB
(
arrayof_jint_fill
);
case
T_DOUBLE
:
case
T_LONG
:
case
T_ARRAY
:
case
T_OBJECT
:
case
T_NARROWOOP
:
case
T_ADDRESS
:
// Currently unsupported
return
NULL
;
default:
ShouldNotReachHere
();
return
NULL
;
}
#undef RETURN_STUB
}
hotspot/src/share/vm/runtime/stubRoutines.hpp
浏览文件 @
6d9460f7
...
...
@@ -148,6 +148,13 @@ class StubRoutines: AllStatic {
static
address
_unsafe_arraycopy
;
static
address
_generic_arraycopy
;
static
address
_jbyte_fill
;
static
address
_jshort_fill
;
static
address
_jint_fill
;
static
address
_arrayof_jbyte_fill
;
static
address
_arrayof_jshort_fill
;
static
address
_arrayof_jint_fill
;
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the
...
...
@@ -259,6 +266,16 @@ class StubRoutines: AllStatic {
static
address
unsafe_arraycopy
()
{
return
_unsafe_arraycopy
;
}
static
address
generic_arraycopy
()
{
return
_generic_arraycopy
;
}
static
address
jbyte_fill
()
{
return
_jbyte_fill
;
}
static
address
jshort_fill
()
{
return
_jshort_fill
;
}
static
address
jint_fill
()
{
return
_jint_fill
;
}
static
address
arrayof_jbyte_fill
()
{
return
_arrayof_jbyte_fill
;
}
static
address
arrayof_jshort_fill
()
{
return
_arrayof_jshort_fill
;
}
static
address
arrayof_jint_fill
()
{
return
_arrayof_jint_fill
;
}
static
address
select_fill_function
(
BasicType
t
,
bool
aligned
,
const
char
*
&
name
);
static
double
intrinsic_log
(
double
d
)
{
assert
(
_intrinsic_log
!=
NULL
,
"must be defined"
);
return
_intrinsic_log
(
d
);
...
...
hotspot/src/share/vm/utilities/globalDefinitions.hpp
浏览文件 @
6d9460f7
...
...
@@ -529,7 +529,7 @@ extern int _type2aelembytes[T_CONFLICT+1]; // maps a BasicType to nof bytes used
#ifdef ASSERT
extern
int
type2aelembytes
(
BasicType
t
,
bool
allow_address
=
false
);
// asserts
#else
inline
int
type2aelembytes
(
BasicType
t
)
{
return
_type2aelembytes
[
t
];
}
inline
int
type2aelembytes
(
BasicType
t
,
bool
allow_address
=
false
)
{
return
_type2aelembytes
[
t
];
}
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录