未验证 提交 3491d183 编写于 作者: D danleifeng 提交者: GitHub

fix gpups CUDADeviceContext to phi-GPUContext;test=develop (#44804)

上级 f1873b90
......@@ -1049,7 +1049,7 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place &place,
place_ = place;
gpuid_ = place_.GetDeviceId();
VLOG(3) << "gpuid " << gpuid_;
stream_ = dynamic_cast<platform::CUDADeviceContext *>(
stream_ = dynamic_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(place))
->stream();
feed_vec_ = feed_vec;
......
......@@ -394,7 +394,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPullDedupImpl(
const int* slot_dims,
const uint32_t* gpu_restore_idx,
int pull_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place))
->stream();
size_t N = total_length * hidden_size;
......@@ -428,7 +428,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const int* key2slot,
const uint32_t* d_restore_idx,
const size_t grad_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place))
->stream();
cudaMemsetAsync(
......@@ -470,7 +470,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const uint32_t* gpu_sort_offset,
const uint32_t* gpu_sort_lens,
const size_t grad_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place))
->stream();
// merge all grad to one
......
......@@ -1130,7 +1130,7 @@ void PSGPUWrapper::PullSparse(const paddle::platform::Place& place,
VLOG(3) << "[" << device_id << "]Begin copy keys, key_num["
<< total_length << "] dedup mode";
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place))
->stream();
......@@ -1399,7 +1399,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
VLOG(3) << "Begin push sparse, key_num[" << total_length
<< "] dedup mode, device:" << device_id << ", index"
<< devid_2_index;
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place))
->stream();
uint64_t* total_keys = dev.keys_tensor.data<uint64_t>();
......
......@@ -128,7 +128,7 @@ void PSGPUWrapper::CopyKeys(const paddle::platform::Place& place,
int slot_num,
int total_len,
int* key2slot) {
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place))
->stream();
CopyKeysKernel2<<<CUDA_BLOCK(total_len), stream>>>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册