Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3491d183
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3491d183
编写于
8月 02, 2022
作者:
D
danleifeng
提交者:
GitHub
8月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix gpups CUDADeviceContext to phi-GPUContext;test=develop (#44804)
上级
f1873b90
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
7 addition
and
7 deletion
+7
-7
paddle/fluid/framework/data_feed.cu
paddle/fluid/framework/data_feed.cu
+1
-1
paddle/fluid/framework/fleet/heter_ps/feature_value.cu
paddle/fluid/framework/fleet/heter_ps/feature_value.cu
+3
-3
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
+2
-2
paddle/fluid/framework/fleet/ps_gpu_wrapper.cu
paddle/fluid/framework/fleet/ps_gpu_wrapper.cu
+1
-1
未找到文件。
paddle/fluid/framework/data_feed.cu
浏览文件 @
3491d183
...
...
@@ -1049,7 +1049,7 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place &place,
place_
=
place
;
gpuid_
=
place_
.
GetDeviceId
();
VLOG
(
3
)
<<
"gpuid "
<<
gpuid_
;
stream_
=
dynamic_cast
<
p
latform
::
CUDADevice
Context
*>
(
stream_
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
feed_vec_
=
feed_vec
;
...
...
paddle/fluid/framework/fleet/heter_ps/feature_value.cu
浏览文件 @
3491d183
...
...
@@ -394,7 +394,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPullDedupImpl(
const
int
*
slot_dims
,
const
uint32_t
*
gpu_restore_idx
,
int
pull_value_size
)
{
auto
stream
=
dynamic_cast
<
p
addle
::
platform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
size_t
N
=
total_length
*
hidden_size
;
...
...
@@ -428,7 +428,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const
int
*
key2slot
,
const
uint32_t
*
d_restore_idx
,
const
size_t
grad_value_size
)
{
auto
stream
=
dynamic_cast
<
p
addle
::
platform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
cudaMemsetAsync
(
...
...
@@ -470,7 +470,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const
uint32_t
*
gpu_sort_offset
,
const
uint32_t
*
gpu_sort_lens
,
const
size_t
grad_value_size
)
{
auto
stream
=
dynamic_cast
<
p
addle
::
platform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
paddle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
// merge all grad to one
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
浏览文件 @
3491d183
...
...
@@ -1130,7 +1130,7 @@ void PSGPUWrapper::PullSparse(const paddle::platform::Place& place,
VLOG
(
3
)
<<
"["
<<
device_id
<<
"]Begin copy keys, key_num["
<<
total_length
<<
"] dedup mode"
;
auto
stream
=
dynamic_cast
<
p
latform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
...
...
@@ -1399,7 +1399,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
VLOG
(
3
)
<<
"Begin push sparse, key_num["
<<
total_length
<<
"] dedup mode, device:"
<<
device_id
<<
", index"
<<
devid_2_index
;
auto
stream
=
dynamic_cast
<
p
latform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
uint64_t
*
total_keys
=
dev
.
keys_tensor
.
data
<
uint64_t
>
();
...
...
paddle/fluid/framework/fleet/ps_gpu_wrapper.cu
浏览文件 @
3491d183
...
...
@@ -128,7 +128,7 @@ void PSGPUWrapper::CopyKeys(const paddle::platform::Place& place,
int
slot_num
,
int
total_len
,
int
*
key2slot
)
{
auto
stream
=
dynamic_cast
<
p
latform
::
CUDADevice
Context
*>
(
auto
stream
=
dynamic_cast
<
p
hi
::
GPU
Context
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
CopyKeysKernel2
<<<
CUDA_BLOCK
(
total_len
),
stream
>>>
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录