Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openanolis
dragonwell8_hotspot
提交
a34ad0c4
D
dragonwell8_hotspot
项目概览
openanolis
/
dragonwell8_hotspot
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
dragonwell8_hotspot
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a34ad0c4
编写于
5月 23, 2019
作者:
A
andrew
浏览文件
操作
浏览文件
下载
差异文件
Merge
上级
fcef1db0
68f60c44
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
133 addition
and
7 deletion
+133
-7
src/cpu/ppc/vm/stubGenerator_ppc.cpp
src/cpu/ppc/vm/stubGenerator_ppc.cpp
+132
-6
src/share/vm/prims/jni.cpp
src/share/vm/prims/jni.cpp
+1
-1
未找到文件。
src/cpu/ppc/vm/stubGenerator_ppc.cpp
浏览文件 @
a34ad0c4
...
...
@@ -1131,8 +1131,11 @@ class StubGenerator: public StubCodeGenerator {
Register
tmp3
=
R8_ARG6
;
Register
tmp4
=
R9_ARG7
;
VectorSRegister
tmp_vsr1
=
VSR1
;
VectorSRegister
tmp_vsr2
=
VSR2
;
Label
l_1
,
l_2
,
l_3
,
l_4
,
l_5
,
l_6
,
l_7
,
l_8
,
l_9
,
l_10
;
Label
l_1
,
l_2
,
l_3
,
l_4
,
l_5
,
l_6
,
l_7
,
l_8
,
l_9
;
// Don't try anything fancy if arrays don't have many elements.
__
li
(
tmp3
,
0
);
__
cmpwi
(
CCR0
,
R5_ARG3
,
17
);
...
...
@@ -1186,6 +1189,8 @@ class StubGenerator: public StubCodeGenerator {
__
andi_
(
R5_ARG3
,
R5_ARG3
,
31
);
__
mtctr
(
tmp1
);
if
(
!
VM_Version
::
has_vsx
())
{
__
bind
(
l_8
);
// Use unrolled version for mass copying (copy 32 elements a time)
// Load feeding store gets zero latency on Power6, however not on Power5.
...
...
@@ -1201,7 +1206,44 @@ class StubGenerator: public StubCodeGenerator {
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
__
bdnz
(
l_8
);
}
}
else
{
// Processor supports VSX, so use it to mass copy.
// Prefetch the data into the L2 cache.
__
dcbt
(
R3_ARG1
,
0
);
// If supported set DSCR pre-fetch to deepest.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
|
7
);
__
mtdscr
(
tmp2
);
}
__
li
(
tmp1
,
16
);
// Backbranch target aligned to 32-byte. Not 16-byte align as
// loop contains < 8 instructions that fit inside a single
// i-cache sector.
__
align
(
32
);
__
bind
(
l_10
);
// Use loop with VSX load/store instructions to
// copy 32 elements a time.
__
lxvd2x
(
tmp_vsr1
,
0
,
R3_ARG1
);
// Load src
__
stxvd2x
(
tmp_vsr1
,
0
,
R4_ARG2
);
// Store to dst
__
lxvd2x
(
tmp_vsr2
,
tmp1
,
R3_ARG1
);
// Load src + 16
__
stxvd2x
(
tmp_vsr2
,
tmp1
,
R4_ARG2
);
// Store to dst + 16
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
// Update src+=32
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
// Update dsc+=32
__
bdnz
(
l_10
);
// Dec CTR and loop if not zero.
// Restore DSCR pre-fetch value.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
);
__
mtdscr
(
tmp2
);
}
}
// VSX
}
// FasterArrayCopy
__
bind
(
l_6
);
...
...
@@ -1570,7 +1612,11 @@ class StubGenerator: public StubCodeGenerator {
Register
tmp3
=
R8_ARG6
;
Register
tmp4
=
R0
;
Label
l_1
,
l_2
,
l_3
,
l_4
,
l_5
,
l_6
;
VectorSRegister
tmp_vsr1
=
VSR1
;
VectorSRegister
tmp_vsr2
=
VSR2
;
Label
l_1
,
l_2
,
l_3
,
l_4
,
l_5
,
l_6
,
l_7
;
// for short arrays, just do single element copy
__
li
(
tmp3
,
0
);
__
cmpwi
(
CCR0
,
R5_ARG3
,
5
);
...
...
@@ -1605,6 +1651,8 @@ class StubGenerator: public StubCodeGenerator {
__
andi_
(
R5_ARG3
,
R5_ARG3
,
7
);
__
mtctr
(
tmp1
);
if
(
!
VM_Version
::
has_vsx
())
{
__
bind
(
l_6
);
// Use unrolled version for mass copying (copy 8 elements a time).
// Load feeding store gets zero latency on power6, however not on power 5.
...
...
@@ -1620,7 +1668,44 @@ class StubGenerator: public StubCodeGenerator {
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
__
bdnz
(
l_6
);
}
}
else
{
// Processor supports VSX, so use it to mass copy.
// Prefetch the data into the L2 cache.
__
dcbt
(
R3_ARG1
,
0
);
// If supported set DSCR pre-fetch to deepest.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
|
7
);
__
mtdscr
(
tmp2
);
}
__
li
(
tmp1
,
16
);
// Backbranch target aligned to 32-byte. Not 16-byte align as
// loop contains < 8 instructions that fit inside a single
// i-cache sector.
__
align
(
32
);
__
bind
(
l_7
);
// Use loop with VSX load/store instructions to
// copy 8 elements a time.
__
lxvd2x
(
tmp_vsr1
,
0
,
R3_ARG1
);
// Load src
__
stxvd2x
(
tmp_vsr1
,
0
,
R4_ARG2
);
// Store to dst
__
lxvd2x
(
tmp_vsr2
,
tmp1
,
R3_ARG1
);
// Load src + 16
__
stxvd2x
(
tmp_vsr2
,
tmp1
,
R4_ARG2
);
// Store to dst + 16
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
// Update src+=32
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
// Update dsc+=32
__
bdnz
(
l_7
);
// Dec CTR and loop if not zero.
// Restore DSCR pre-fetch value.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
);
__
mtdscr
(
tmp2
);
}
}
// VSX
}
// FasterArrayCopy
// copy 1 element at a time
__
bind
(
l_2
);
...
...
@@ -1772,7 +1857,10 @@ class StubGenerator: public StubCodeGenerator {
Register
tmp3
=
R8_ARG6
;
Register
tmp4
=
R0
;
Label
l_1
,
l_2
,
l_3
,
l_4
;
Label
l_1
,
l_2
,
l_3
,
l_4
,
l_5
;
VectorSRegister
tmp_vsr1
=
VSR1
;
VectorSRegister
tmp_vsr2
=
VSR2
;
{
// FasterArrayCopy
__
cmpwi
(
CCR0
,
R5_ARG3
,
3
);
...
...
@@ -1782,6 +1870,7 @@ class StubGenerator: public StubCodeGenerator {
__
andi_
(
R5_ARG3
,
R5_ARG3
,
3
);
__
mtctr
(
tmp1
);
if
(
!
VM_Version
::
has_vsx
())
{
__
bind
(
l_4
);
// Use unrolled version for mass copying (copy 4 elements a time).
// Load feeding store gets zero latency on Power6, however not on Power5.
...
...
@@ -1797,7 +1886,44 @@ class StubGenerator: public StubCodeGenerator {
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
__
bdnz
(
l_4
);
}
}
else
{
// Processor supports VSX, so use it to mass copy.
// Prefetch the data into the L2 cache.
__
dcbt
(
R3_ARG1
,
0
);
// If supported set DSCR pre-fetch to deepest.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
|
7
);
__
mtdscr
(
tmp2
);
}
__
li
(
tmp1
,
16
);
// Backbranch target aligned to 32-byte. Not 16-byte align as
// loop contains < 8 instructions that fit inside a single
// i-cache sector.
__
align
(
32
);
__
bind
(
l_5
);
// Use loop with VSX load/store instructions to
// copy 4 elements a time.
__
lxvd2x
(
tmp_vsr1
,
0
,
R3_ARG1
);
// Load src
__
stxvd2x
(
tmp_vsr1
,
0
,
R4_ARG2
);
// Store to dst
__
lxvd2x
(
tmp_vsr2
,
tmp1
,
R3_ARG1
);
// Load src + 16
__
stxvd2x
(
tmp_vsr2
,
tmp1
,
R4_ARG2
);
// Store to dst + 16
__
addi
(
R3_ARG1
,
R3_ARG1
,
32
);
// Update src+=32
__
addi
(
R4_ARG2
,
R4_ARG2
,
32
);
// Update dsc+=32
__
bdnz
(
l_5
);
// Dec CTR and loop if not zero.
// Restore DSCR pre-fetch value.
if
(
VM_Version
::
has_mfdscr
())
{
__
load_const_optimized
(
tmp2
,
VM_Version
::
_dscr_val
);
__
mtdscr
(
tmp2
);
}
}
// VSX
}
// FasterArrayCopy
// copy 1 element at a time
__
bind
(
l_3
);
...
...
src/share/vm/prims/jni.cpp
浏览文件 @
a34ad0c4
/*
* Copyright (c) 1997, 201
7
, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 201
8
, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录