Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell8_hotspot
提交
29a59e70
D
dragonwell8_hotspot
项目概览
openanolis
/
dragonwell8_hotspot
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell8_hotspot
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
29a59e70
编写于
9月 03, 2011
作者:
N
never
浏览文件
操作
浏览文件
下载
差异文件
Merge
上级
c812e04e
5e59a959
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
285 addition
and
91 deletion
+285
-91
agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java
...ses/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java
+1
-1
src/cpu/sparc/vm/stubGenerator_sparc.cpp
src/cpu/sparc/vm/stubGenerator_sparc.cpp
+244
-90
src/cpu/sparc/vm/vm_version_sparc.cpp
src/cpu/sparc/vm/vm_version_sparc.cpp
+28
-0
src/share/vm/runtime/globals.hpp
src/share/vm/runtime/globals.hpp
+12
-0
未找到文件。
agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java
浏览文件 @
29a59e70
...
@@ -90,7 +90,7 @@ public class BytecodeLoadConstant extends Bytecode {
...
@@ -90,7 +90,7 @@ public class BytecodeLoadConstant extends Bytecode {
jcode
==
Bytecodes
.
_ldc2_w
;
jcode
==
Bytecodes
.
_ldc2_w
;
if
(!
codeOk
)
return
false
;
if
(!
codeOk
)
return
false
;
ConstantTag
ctag
=
method
().
getConstants
().
getTagAt
(
raw
Index
());
ConstantTag
ctag
=
method
().
getConstants
().
getTagAt
(
pool
Index
());
if
(
jcode
==
Bytecodes
.
_ldc2_w
)
{
if
(
jcode
==
Bytecodes
.
_ldc2_w
)
{
// has to be double or long
// has to be double or long
return
(
ctag
.
isDouble
()
||
ctag
.
isLong
())
?
true
:
false
;
return
(
ctag
.
isDouble
()
||
ctag
.
isLong
())
?
true
:
false
;
...
...
src/cpu/sparc/vm/stubGenerator_sparc.cpp
浏览文件 @
29a59e70
...
@@ -1124,6 +1124,126 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1124,6 +1124,126 @@ class StubGenerator: public StubCodeGenerator {
}
}
}
}
//
// Generate main code for disjoint arraycopy
//
typedef
void
(
StubGenerator
::*
CopyLoopFunc
)(
Register
from
,
Register
to
,
Register
count
,
int
count_dec
,
Label
&
L_loop
,
bool
use_prefetch
,
bool
use_bis
);
void
disjoint_copy_core
(
Register
from
,
Register
to
,
Register
count
,
int
log2_elem_size
,
int
iter_size
,
CopyLoopFunc
copy_loop_func
)
{
Label
L_copy
;
assert
(
log2_elem_size
<=
3
,
"the following code should be changed"
);
int
count_dec
=
16
>>
log2_elem_size
;
int
prefetch_dist
=
MAX2
(
ArraycopySrcPrefetchDistance
,
ArraycopyDstPrefetchDistance
);
assert
(
prefetch_dist
<
4096
,
"invalid value"
);
prefetch_dist
=
(
prefetch_dist
+
(
iter_size
-
1
))
&
(
-
iter_size
);
// round up to one iteration copy size
int
prefetch_count
=
(
prefetch_dist
>>
log2_elem_size
);
// elements count
if
(
UseBlockCopy
)
{
Label
L_block_copy
,
L_block_copy_prefetch
,
L_skip_block_copy
;
// 64 bytes tail + bytes copied in one loop iteration
int
tail_size
=
64
+
iter_size
;
int
block_copy_count
=
(
MAX2
(
tail_size
,
(
int
)
BlockCopyLowLimit
))
>>
log2_elem_size
;
// Use BIS copy only for big arrays since it requires membar.
__
set
(
block_copy_count
,
O4
);
__
cmp_and_br_short
(
count
,
O4
,
Assembler
::
lessUnsigned
,
Assembler
::
pt
,
L_skip_block_copy
);
// This code is for disjoint source and destination:
// to <= from || to >= from+count
// but BIS will stomp over 'from' if (to > from-tail_size && to <= from)
__
sub
(
from
,
to
,
O4
);
__
srax
(
O4
,
4
,
O4
);
// divide by 16 since following short branch have only 5 bits for imm.
__
cmp_and_br_short
(
O4
,
(
tail_size
>>
4
),
Assembler
::
lessEqualUnsigned
,
Assembler
::
pn
,
L_skip_block_copy
);
__
wrasi
(
G0
,
Assembler
::
ASI_ST_BLKINIT_PRIMARY
);
// BIS should not be used to copy tail (64 bytes+iter_size)
// to avoid zeroing of following values.
__
sub
(
count
,
(
tail_size
>>
log2_elem_size
),
count
);
// count is still positive >= 0
if
(
prefetch_count
>
0
)
{
// rounded up to one iteration count
// Do prefetching only if copy size is bigger
// than prefetch distance.
__
set
(
prefetch_count
,
O4
);
__
cmp_and_brx_short
(
count
,
O4
,
Assembler
::
less
,
Assembler
::
pt
,
L_block_copy
);
__
sub
(
count
,
prefetch_count
,
count
);
(
this
->*
copy_loop_func
)(
from
,
to
,
count
,
count_dec
,
L_block_copy_prefetch
,
true
,
true
);
__
add
(
count
,
prefetch_count
,
count
);
// restore count
}
// prefetch_count > 0
(
this
->*
copy_loop_func
)(
from
,
to
,
count
,
count_dec
,
L_block_copy
,
false
,
true
);
__
add
(
count
,
(
tail_size
>>
log2_elem_size
),
count
);
// restore count
__
wrasi
(
G0
,
Assembler
::
ASI_PRIMARY_NOFAULT
);
// BIS needs membar.
__
membar
(
Assembler
::
StoreLoad
);
// Copy tail
__
ba_short
(
L_copy
);
__
BIND
(
L_skip_block_copy
);
}
// UseBlockCopy
if
(
prefetch_count
>
0
)
{
// rounded up to one iteration count
// Do prefetching only if copy size is bigger
// than prefetch distance.
__
set
(
prefetch_count
,
O4
);
__
cmp_and_brx_short
(
count
,
O4
,
Assembler
::
lessUnsigned
,
Assembler
::
pt
,
L_copy
);
__
sub
(
count
,
prefetch_count
,
count
);
Label
L_copy_prefetch
;
(
this
->*
copy_loop_func
)(
from
,
to
,
count
,
count_dec
,
L_copy_prefetch
,
true
,
false
);
__
add
(
count
,
prefetch_count
,
count
);
// restore count
}
// prefetch_count > 0
(
this
->*
copy_loop_func
)(
from
,
to
,
count
,
count_dec
,
L_copy
,
false
,
false
);
}
//
// Helper methods for copy_16_bytes_forward_with_shift()
//
void
copy_16_bytes_shift_loop
(
Register
from
,
Register
to
,
Register
count
,
int
count_dec
,
Label
&
L_loop
,
bool
use_prefetch
,
bool
use_bis
)
{
const
Register
left_shift
=
G1
;
// left shift bit counter
const
Register
right_shift
=
G5
;
// right shift bit counter
__
align
(
OptoLoopAlignment
);
__
BIND
(
L_loop
);
if
(
use_prefetch
)
{
if
(
ArraycopySrcPrefetchDistance
>
0
)
{
__
prefetch
(
from
,
ArraycopySrcPrefetchDistance
,
Assembler
::
severalReads
);
}
if
(
ArraycopyDstPrefetchDistance
>
0
)
{
__
prefetch
(
to
,
ArraycopyDstPrefetchDistance
,
Assembler
::
severalWritesAndPossiblyReads
);
}
}
__
ldx
(
from
,
0
,
O4
);
__
ldx
(
from
,
8
,
G4
);
__
inc
(
to
,
16
);
__
inc
(
from
,
16
);
__
deccc
(
count
,
count_dec
);
// Can we do next iteration after this one?
__
srlx
(
O4
,
right_shift
,
G3
);
__
bset
(
G3
,
O3
);
__
sllx
(
O4
,
left_shift
,
O4
);
__
srlx
(
G4
,
right_shift
,
G3
);
__
bset
(
G3
,
O4
);
if
(
use_bis
)
{
__
stxa
(
O3
,
to
,
-
16
);
__
stxa
(
O4
,
to
,
-
8
);
}
else
{
__
stx
(
O3
,
to
,
-
16
);
__
stx
(
O4
,
to
,
-
8
);
}
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_loop
);
__
delayed
()
->
sllx
(
G4
,
left_shift
,
O3
);
}
// Copy big chunks forward with shift
// Copy big chunks forward with shift
//
//
...
@@ -1135,64 +1255,51 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1135,64 +1255,51 @@ class StubGenerator: public StubCodeGenerator {
// L_copy_bytes - copy exit label
// L_copy_bytes - copy exit label
//
//
void
copy_16_bytes_forward_with_shift
(
Register
from
,
Register
to
,
void
copy_16_bytes_forward_with_shift
(
Register
from
,
Register
to
,
Register
count
,
int
count_dec
,
Label
&
L_copy_bytes
)
{
Register
count
,
int
log2_elem_size
,
Label
&
L_copy_bytes
)
{
Label
L_loop
,
L_aligned_copy
,
L_copy_last_bytes
;
Label
L_aligned_copy
,
L_copy_last_bytes
;
assert
(
log2_elem_size
<=
3
,
"the following code should be changed"
);
int
count_dec
=
16
>>
log2_elem_size
;
// if both arrays have the same alignment mod 8, do 8 bytes aligned copy
// if both arrays have the same alignment mod 8, do 8 bytes aligned copy
__
andcc
(
from
,
7
,
G1
);
// misaligned bytes
__
andcc
(
from
,
7
,
G1
);
// misaligned bytes
__
br
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_aligned_copy
);
__
br
(
Assembler
::
zero
,
false
,
Assembler
::
pt
,
L_aligned_copy
);
__
delayed
()
->
nop
();
__
delayed
()
->
nop
();
const
Register
left_shift
=
G1
;
// left shift bit counter
const
Register
left_shift
=
G1
;
// left shift bit counter
const
Register
right_shift
=
G5
;
// right shift bit counter
const
Register
right_shift
=
G5
;
// right shift bit counter
__
sll
(
G1
,
LogBitsPerByte
,
left_shift
);
__
sll
(
G1
,
LogBitsPerByte
,
left_shift
);
__
mov
(
64
,
right_shift
);
__
mov
(
64
,
right_shift
);
__
sub
(
right_shift
,
left_shift
,
right_shift
);
__
sub
(
right_shift
,
left_shift
,
right_shift
);
//
//
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// Load 2 aligned 8-bytes chunks and use one from previous iteration
// to form 2 aligned 8-bytes chunks to store.
// to form 2 aligned 8-bytes chunks to store.
//
//
__
deccc
(
count
,
count_dec
);
// Pre-decrement 'count'
__
dec
(
count
,
count_dec
);
// Pre-decrement 'count'
__
andn
(
from
,
7
,
from
);
// Align address
__
andn
(
from
,
7
,
from
);
// Align address
__
ldx
(
from
,
0
,
O3
);
__
ldx
(
from
,
0
,
O3
);
__
inc
(
from
,
8
);
__
inc
(
from
,
8
);
__
align
(
OptoLoopAlignment
);
__
sllx
(
O3
,
left_shift
,
O3
);
__
BIND
(
L_loop
);
__
ldx
(
from
,
0
,
O4
);
disjoint_copy_core
(
from
,
to
,
count
,
log2_elem_size
,
16
,
copy_16_bytes_shift_loop
);
__
deccc
(
count
,
count_dec
);
// Can we do next iteration after this one?
__
ldx
(
from
,
8
,
G4
);
__
inccc
(
count
,
count_dec
>>
1
);
// + 8 bytes
__
inc
(
to
,
16
);
__
brx
(
Assembler
::
negative
,
true
,
Assembler
::
pn
,
L_copy_last_bytes
);
__
inc
(
from
,
16
);
__
delayed
()
->
inc
(
count
,
count_dec
>>
1
);
// restore 'count'
__
sllx
(
O3
,
left_shift
,
O3
);
__
srlx
(
O4
,
right_shift
,
G3
);
// copy 8 bytes, part of them already loaded in O3
__
bset
(
G3
,
O3
);
__
ldx
(
from
,
0
,
O4
);
__
stx
(
O3
,
to
,
-
16
);
__
inc
(
to
,
8
);
__
sllx
(
O4
,
left_shift
,
O4
);
__
inc
(
from
,
8
);
__
srlx
(
G4
,
right_shift
,
G3
);
__
srlx
(
O4
,
right_shift
,
G3
);
__
bset
(
G3
,
O4
);
__
bset
(
O3
,
G3
);
__
stx
(
O4
,
to
,
-
8
);
__
stx
(
G3
,
to
,
-
8
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_loop
);
__
delayed
()
->
mov
(
G4
,
O3
);
__
inccc
(
count
,
count_dec
>>
1
);
// + 8 bytes
__
brx
(
Assembler
::
negative
,
true
,
Assembler
::
pn
,
L_copy_last_bytes
);
__
delayed
()
->
inc
(
count
,
count_dec
>>
1
);
// restore 'count'
// copy 8 bytes, part of them already loaded in O3
__
ldx
(
from
,
0
,
O4
);
__
inc
(
to
,
8
);
__
inc
(
from
,
8
);
__
sllx
(
O3
,
left_shift
,
O3
);
__
srlx
(
O4
,
right_shift
,
G3
);
__
bset
(
O3
,
G3
);
__
stx
(
G3
,
to
,
-
8
);
__
BIND
(
L_copy_last_bytes
);
__
BIND
(
L_copy_last_bytes
);
__
srl
(
right_shift
,
LogBitsPerByte
,
right_shift
);
// misaligned bytes
__
srl
(
right_shift
,
LogBitsPerByte
,
right_shift
);
// misaligned bytes
__
br
(
Assembler
::
always
,
false
,
Assembler
::
pt
,
L_copy_bytes
);
__
br
(
Assembler
::
always
,
false
,
Assembler
::
pt
,
L_copy_bytes
);
__
delayed
()
->
sub
(
from
,
right_shift
,
from
);
// restore address
__
delayed
()
->
sub
(
from
,
right_shift
,
from
);
// restore address
__
BIND
(
L_aligned_copy
);
__
BIND
(
L_aligned_copy
);
}
}
...
@@ -1348,7 +1455,7 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1348,7 +1455,7 @@ class StubGenerator: public StubCodeGenerator {
// The compare above (count >= 23) guarantes 'count' >= 16 bytes.
// The compare above (count >= 23) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
// Also jump over aligned copy after the copy with shift completed.
copy_16_bytes_forward_with_shift
(
from
,
to
,
count
,
16
,
L_copy_byte
);
copy_16_bytes_forward_with_shift
(
from
,
to
,
count
,
0
,
L_copy_byte
);
}
}
// Both array are 8 bytes aligned, copy 16 bytes at a time
// Both array are 8 bytes aligned, copy 16 bytes at a time
...
@@ -1576,7 +1683,7 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1576,7 +1683,7 @@ class StubGenerator: public StubCodeGenerator {
// The compare above (count >= 11) guarantes 'count' >= 16 bytes.
// The compare above (count >= 11) guarantes 'count' >= 16 bytes.
// Also jump over aligned copy after the copy with shift completed.
// Also jump over aligned copy after the copy with shift completed.
copy_16_bytes_forward_with_shift
(
from
,
to
,
count
,
8
,
L_copy_2_bytes
);
copy_16_bytes_forward_with_shift
(
from
,
to
,
count
,
1
,
L_copy_2_bytes
);
}
}
// Both array are 8 bytes aligned, copy 16 bytes at a time
// Both array are 8 bytes aligned, copy 16 bytes at a time
...
@@ -1949,6 +2056,45 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1949,6 +2056,45 @@ class StubGenerator: public StubCodeGenerator {
return
start
;
return
start
;
}
}
//
// Helper methods for generate_disjoint_int_copy_core()
//
void
copy_16_bytes_loop
(
Register
from
,
Register
to
,
Register
count
,
int
count_dec
,
Label
&
L_loop
,
bool
use_prefetch
,
bool
use_bis
)
{
__
align
(
OptoLoopAlignment
);
__
BIND
(
L_loop
);
if
(
use_prefetch
)
{
if
(
ArraycopySrcPrefetchDistance
>
0
)
{
__
prefetch
(
from
,
ArraycopySrcPrefetchDistance
,
Assembler
::
severalReads
);
}
if
(
ArraycopyDstPrefetchDistance
>
0
)
{
__
prefetch
(
to
,
ArraycopyDstPrefetchDistance
,
Assembler
::
severalWritesAndPossiblyReads
);
}
}
__
ldx
(
from
,
4
,
O4
);
__
ldx
(
from
,
12
,
G4
);
__
inc
(
to
,
16
);
__
inc
(
from
,
16
);
__
deccc
(
count
,
4
);
// Can we do next iteration after this one?
__
srlx
(
O4
,
32
,
G3
);
__
bset
(
G3
,
O3
);
__
sllx
(
O4
,
32
,
O4
);
__
srlx
(
G4
,
32
,
G3
);
__
bset
(
G3
,
O4
);
if
(
use_bis
)
{
__
stxa
(
O3
,
to
,
-
16
);
__
stxa
(
O4
,
to
,
-
8
);
}
else
{
__
stx
(
O3
,
to
,
-
16
);
__
stx
(
O4
,
to
,
-
8
);
}
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_loop
);
__
delayed
()
->
sllx
(
G4
,
32
,
O3
);
}
//
//
// Generate core code for disjoint int copy (and oop copy on 32-bit).
// Generate core code for disjoint int copy (and oop copy on 32-bit).
// If "aligned" is true, the "from" and "to" addresses are assumed
// If "aligned" is true, the "from" and "to" addresses are assumed
...
@@ -1962,7 +2108,7 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -1962,7 +2108,7 @@ class StubGenerator: public StubCodeGenerator {
void
generate_disjoint_int_copy_core
(
bool
aligned
)
{
void
generate_disjoint_int_copy_core
(
bool
aligned
)
{
Label
L_skip_alignment
,
L_aligned_copy
;
Label
L_skip_alignment
,
L_aligned_copy
;
Label
L_copy_
16_bytes
,
L_copy_
4_bytes
,
L_copy_4_bytes_loop
,
L_exit
;
Label
L_copy_4_bytes
,
L_copy_4_bytes_loop
,
L_exit
;
const
Register
from
=
O0
;
// source array address
const
Register
from
=
O0
;
// source array address
const
Register
to
=
O1
;
// destination array address
const
Register
to
=
O1
;
// destination array address
...
@@ -2013,30 +2159,16 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -2013,30 +2159,16 @@ class StubGenerator: public StubCodeGenerator {
// copy with shift 4 elements (16 bytes) at a time
// copy with shift 4 elements (16 bytes) at a time
__
dec
(
count
,
4
);
// The cmp at the beginning guaranty count >= 4
__
dec
(
count
,
4
);
// The cmp at the beginning guaranty count >= 4
__
sllx
(
O3
,
32
,
O3
);
__
align
(
OptoLoopAlignment
);
disjoint_copy_core
(
from
,
to
,
count
,
2
,
16
,
copy_16_bytes_loop
);
__
BIND
(
L_copy_16_bytes
);
__
ldx
(
from
,
4
,
O4
);
__
deccc
(
count
,
4
);
// Can we do next iteration after this one?
__
ldx
(
from
,
12
,
G4
);
__
inc
(
to
,
16
);
__
inc
(
from
,
16
);
__
sllx
(
O3
,
32
,
O3
);
__
srlx
(
O4
,
32
,
G3
);
__
bset
(
G3
,
O3
);
__
stx
(
O3
,
to
,
-
16
);
__
sllx
(
O4
,
32
,
O4
);
__
srlx
(
G4
,
32
,
G3
);
__
bset
(
G3
,
O4
);
__
stx
(
O4
,
to
,
-
8
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_copy_16_bytes
);
__
delayed
()
->
mov
(
G4
,
O3
);
__
br
(
Assembler
::
always
,
false
,
Assembler
::
pt
,
L_copy_4_bytes
);
__
br
(
Assembler
::
always
,
false
,
Assembler
::
pt
,
L_copy_4_bytes
);
__
delayed
()
->
inc
(
count
,
4
);
// restore 'count'
__
delayed
()
->
inc
(
count
,
4
);
// restore 'count'
__
BIND
(
L_aligned_copy
);
__
BIND
(
L_aligned_copy
);
}
}
// !aligned
// copy 4 elements (16 bytes) at a time
// copy 4 elements (16 bytes) at a time
__
and3
(
count
,
1
,
G4
);
// Save
__
and3
(
count
,
1
,
G4
);
// Save
__
srl
(
count
,
1
,
count
);
__
srl
(
count
,
1
,
count
);
...
@@ -2222,6 +2354,38 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -2222,6 +2354,38 @@ class StubGenerator: public StubCodeGenerator {
return
start
;
return
start
;
}
}
//
// Helper methods for generate_disjoint_long_copy_core()
//
void
copy_64_bytes_loop
(
Register
from
,
Register
to
,
Register
count
,
int
count_dec
,
Label
&
L_loop
,
bool
use_prefetch
,
bool
use_bis
)
{
__
align
(
OptoLoopAlignment
);
__
BIND
(
L_loop
);
for
(
int
off
=
0
;
off
<
64
;
off
+=
16
)
{
if
(
use_prefetch
&&
(
off
&
31
)
==
0
)
{
if
(
ArraycopySrcPrefetchDistance
>
0
)
{
__
prefetch
(
from
,
ArraycopySrcPrefetchDistance
,
Assembler
::
severalReads
);
}
if
(
ArraycopyDstPrefetchDistance
>
0
)
{
__
prefetch
(
to
,
ArraycopyDstPrefetchDistance
,
Assembler
::
severalWritesAndPossiblyReads
);
}
}
__
ldx
(
from
,
off
+
0
,
O4
);
__
ldx
(
from
,
off
+
8
,
O5
);
if
(
use_bis
)
{
__
stxa
(
O4
,
to
,
off
+
0
);
__
stxa
(
O5
,
to
,
off
+
8
);
}
else
{
__
stx
(
O4
,
to
,
off
+
0
);
__
stx
(
O5
,
to
,
off
+
8
);
}
}
__
deccc
(
count
,
8
);
__
inc
(
from
,
64
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_loop
);
__
delayed
()
->
inc
(
to
,
64
);
}
//
//
// Generate core code for disjoint long copy (and oop copy on 64-bit).
// Generate core code for disjoint long copy (and oop copy on 64-bit).
// "aligned" is ignored, because we must make the stronger
// "aligned" is ignored, because we must make the stronger
...
@@ -2261,38 +2425,28 @@ class StubGenerator: public StubCodeGenerator {
...
@@ -2261,38 +2425,28 @@ class StubGenerator: public StubCodeGenerator {
const
Register
offset0
=
O4
;
// element offset
const
Register
offset0
=
O4
;
// element offset
const
Register
offset8
=
O5
;
// next element offset
const
Register
offset8
=
O5
;
// next element offset
__
deccc
(
count
,
2
);
__
deccc
(
count
,
2
);
__
mov
(
G0
,
offset0
);
// offset from start of arrays (0)
__
mov
(
G0
,
offset0
);
// offset from start of arrays (0)
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pn
,
L_copy_8_bytes
);
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pn
,
L_copy_8_bytes
);
__
delayed
()
->
add
(
offset0
,
8
,
offset8
);
__
delayed
()
->
add
(
offset0
,
8
,
offset8
);
// Copy by 64 bytes chunks
// Copy by 64 bytes chunks
Label
L_copy_64_bytes
;
const
Register
from64
=
O3
;
// source address
const
Register
from64
=
O3
;
// source address
const
Register
to64
=
G3
;
// destination address
const
Register
to64
=
G3
;
// destination address
__
subcc
(
count
,
6
,
O3
);
__
subcc
(
count
,
6
,
O3
);
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pt
,
L_copy_16_bytes
);
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pt
,
L_copy_16_bytes
);
__
delayed
()
->
mov
(
to
,
to64
);
__
delayed
()
->
mov
(
to
,
to64
);
// Now we can use O4(offset0), O5(offset8) as temps
// Now we can use O4(offset0), O5(offset8) as temps
__
mov
(
O3
,
count
);
__
mov
(
O3
,
count
);
__
mov
(
from
,
from64
);
// count >= 0 (original count - 8)
__
mov
(
from
,
from64
);
__
align
(
OptoLoopAlignment
);
disjoint_copy_core
(
from64
,
to64
,
count
,
3
,
64
,
copy_64_bytes_loop
);
__
BIND
(
L_copy_64_bytes
);
for
(
int
off
=
0
;
off
<
64
;
off
+=
16
)
{
__
ldx
(
from64
,
off
+
0
,
O4
);
__
ldx
(
from64
,
off
+
8
,
O5
);
__
stx
(
O4
,
to64
,
off
+
0
);
__
stx
(
O5
,
to64
,
off
+
8
);
}
__
deccc
(
count
,
8
);
__
inc
(
from64
,
64
);
__
brx
(
Assembler
::
greaterEqual
,
false
,
Assembler
::
pt
,
L_copy_64_bytes
);
__
delayed
()
->
inc
(
to64
,
64
);
// Restore O4(offset0), O5(offset8)
// Restore O4(offset0), O5(offset8)
__
sub
(
from64
,
from
,
offset0
);
__
sub
(
from64
,
from
,
offset0
);
__
inccc
(
count
,
6
);
__
inccc
(
count
,
6
);
// restore count
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pn
,
L_copy_8_bytes
);
__
brx
(
Assembler
::
negative
,
false
,
Assembler
::
pn
,
L_copy_8_bytes
);
__
delayed
()
->
add
(
offset0
,
8
,
offset8
);
__
delayed
()
->
add
(
offset0
,
8
,
offset8
);
...
...
src/cpu/sparc/vm/vm_version_sparc.cpp
浏览文件 @
29a59e70
...
@@ -75,6 +75,24 @@ void VM_Version::initialize() {
...
@@ -75,6 +75,24 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT
(
AllocatePrefetchStyle
,
1
);
FLAG_SET_DEFAULT
(
AllocatePrefetchStyle
,
1
);
}
}
if
(
has_v9
())
{
assert
(
ArraycopySrcPrefetchDistance
<
4096
,
"invalid value"
);
if
(
ArraycopySrcPrefetchDistance
>=
4096
)
ArraycopySrcPrefetchDistance
=
4064
;
assert
(
ArraycopyDstPrefetchDistance
<
4096
,
"invalid value"
);
if
(
ArraycopyDstPrefetchDistance
>=
4096
)
ArraycopyDstPrefetchDistance
=
4064
;
}
else
{
if
(
ArraycopySrcPrefetchDistance
>
0
)
{
warning
(
"prefetch instructions are not available on this CPU"
);
FLAG_SET_DEFAULT
(
ArraycopySrcPrefetchDistance
,
0
);
}
if
(
ArraycopyDstPrefetchDistance
>
0
)
{
warning
(
"prefetch instructions are not available on this CPU"
);
FLAG_SET_DEFAULT
(
ArraycopyDstPrefetchDistance
,
0
);
}
}
UseSSE
=
0
;
// Only on x86 and x64
UseSSE
=
0
;
// Only on x86 and x64
_supports_cx8
=
has_v9
();
_supports_cx8
=
has_v9
();
...
@@ -180,6 +198,16 @@ void VM_Version::initialize() {
...
@@ -180,6 +198,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT
(
UseBlockZeroing
,
false
);
FLAG_SET_DEFAULT
(
UseBlockZeroing
,
false
);
}
}
assert
(
BlockCopyLowLimit
>
0
,
"invalid value"
);
if
(
has_block_zeroing
())
{
// has_blk_init() && is_T4(): core's local L2 cache
if
(
FLAG_IS_DEFAULT
(
UseBlockCopy
))
{
FLAG_SET_DEFAULT
(
UseBlockCopy
,
true
);
}
}
else
if
(
UseBlockCopy
)
{
warning
(
"BIS instructions are not available or expensive on this CPU"
);
FLAG_SET_DEFAULT
(
UseBlockCopy
,
false
);
}
#ifdef COMPILER2
#ifdef COMPILER2
// T4 and newer Sparc cpus have fast RDPC.
// T4 and newer Sparc cpus have fast RDPC.
if
(
has_fast_rdpc
()
&&
FLAG_IS_DEFAULT
(
UseRDPCForConstantTableBase
))
{
if
(
has_fast_rdpc
()
&&
FLAG_IS_DEFAULT
(
UseRDPCForConstantTableBase
))
{
...
...
src/share/vm/runtime/globals.hpp
浏览文件 @
29a59e70
...
@@ -1985,6 +1985,12 @@ class CommandLineFlags {
...
@@ -1985,6 +1985,12 @@ class CommandLineFlags {
product(intx, BlockZeroingLowLimit, 2048, \
product(intx, BlockZeroingLowLimit, 2048, \
"Minimum size in bytes when block zeroing will be used") \
"Minimum size in bytes when block zeroing will be used") \
\
\
product(bool, UseBlockCopy, false, \
"Use special cpu instructions for block copy") \
\
product(intx, BlockCopyLowLimit, 2048, \
"Minimum size in bytes when block copy will be used") \
\
product(bool, PrintRevisitStats, false, \
product(bool, PrintRevisitStats, false, \
"Print revisit (klass and MDO) stack related information") \
"Print revisit (klass and MDO) stack related information") \
\
\
...
@@ -2918,6 +2924,12 @@ class CommandLineFlags {
...
@@ -2918,6 +2924,12 @@ class CommandLineFlags {
product(intx, ReadPrefetchInstr, 0, \
product(intx, ReadPrefetchInstr, 0, \
"Prefetch instruction to prefetch ahead") \
"Prefetch instruction to prefetch ahead") \
\
\
product(uintx, ArraycopySrcPrefetchDistance, 0, \
"Distance to prefetch source array in arracopy") \
\
product(uintx, ArraycopyDstPrefetchDistance, 0, \
"Distance to prefetch destination array in arracopy") \
\
/* deoptimization */
\
/* deoptimization */
\
develop(bool, TraceDeoptimization, false, \
develop(bool, TraceDeoptimization, false, \
"Trace deoptimization") \
"Trace deoptimization") \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录