未验证 提交 3491d183 编写于 作者: D danleifeng 提交者: GitHub

fix gpups CUDADeviceContext to phi-GPUContext;test=develop (#44804)

上级 f1873b90
...@@ -1049,7 +1049,7 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place &place, ...@@ -1049,7 +1049,7 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place &place,
place_ = place; place_ = place;
gpuid_ = place_.GetDeviceId(); gpuid_ = place_.GetDeviceId();
VLOG(3) << "gpuid " << gpuid_; VLOG(3) << "gpuid " << gpuid_;
stream_ = dynamic_cast<platform::CUDADeviceContext *>( stream_ = dynamic_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(place)) platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
feed_vec_ = feed_vec; feed_vec_ = feed_vec;
......
...@@ -394,7 +394,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPullDedupImpl( ...@@ -394,7 +394,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPullDedupImpl(
const int* slot_dims, const int* slot_dims,
const uint32_t* gpu_restore_idx, const uint32_t* gpu_restore_idx,
int pull_value_size) { int pull_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place)) paddle::platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
size_t N = total_length * hidden_size; size_t N = total_length * hidden_size;
...@@ -428,7 +428,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl( ...@@ -428,7 +428,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const int* key2slot, const int* key2slot,
const uint32_t* d_restore_idx, const uint32_t* d_restore_idx,
const size_t grad_value_size) { const size_t grad_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place)) paddle::platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
cudaMemsetAsync( cudaMemsetAsync(
...@@ -470,7 +470,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl( ...@@ -470,7 +470,7 @@ void AccessorWrapper<GPUAccessor>::CopyForPushDedupImpl(
const uint32_t* gpu_sort_offset, const uint32_t* gpu_sort_offset,
const uint32_t* gpu_sort_lens, const uint32_t* gpu_sort_lens,
const size_t grad_value_size) { const size_t grad_value_size) {
auto stream = dynamic_cast<paddle::platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
paddle::platform::DeviceContextPool::Instance().Get(place)) paddle::platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
// merge all grad to one // merge all grad to one
......
...@@ -1130,7 +1130,7 @@ void PSGPUWrapper::PullSparse(const paddle::platform::Place& place, ...@@ -1130,7 +1130,7 @@ void PSGPUWrapper::PullSparse(const paddle::platform::Place& place,
VLOG(3) << "[" << device_id << "]Begin copy keys, key_num[" VLOG(3) << "[" << device_id << "]Begin copy keys, key_num["
<< total_length << "] dedup mode"; << total_length << "] dedup mode";
auto stream = dynamic_cast<platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place)) platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
...@@ -1399,7 +1399,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place, ...@@ -1399,7 +1399,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place,
VLOG(3) << "Begin push sparse, key_num[" << total_length VLOG(3) << "Begin push sparse, key_num[" << total_length
<< "] dedup mode, device:" << device_id << ", index" << "] dedup mode, device:" << device_id << ", index"
<< devid_2_index; << devid_2_index;
auto stream = dynamic_cast<platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place)) platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
uint64_t* total_keys = dev.keys_tensor.data<uint64_t>(); uint64_t* total_keys = dev.keys_tensor.data<uint64_t>();
......
...@@ -128,7 +128,7 @@ void PSGPUWrapper::CopyKeys(const paddle::platform::Place& place, ...@@ -128,7 +128,7 @@ void PSGPUWrapper::CopyKeys(const paddle::platform::Place& place,
int slot_num, int slot_num,
int total_len, int total_len,
int* key2slot) { int* key2slot) {
auto stream = dynamic_cast<platform::CUDADeviceContext*>( auto stream = dynamic_cast<phi::GPUContext*>(
platform::DeviceContextPool::Instance().Get(place)) platform::DeviceContextPool::Instance().Get(place))
->stream(); ->stream();
CopyKeysKernel2<<<CUDA_BLOCK(total_len), stream>>>( CopyKeysKernel2<<<CUDA_BLOCK(total_len), stream>>>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册