diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu index 681fb1fdb295ce01117fe9d46083f9282ab091dd..3c4f2c5bbc74ddd8b6291131d1fca5d925c3ed03 100644 --- a/paddle/fluid/framework/data_feed.cu +++ b/paddle/fluid/framework/data_feed.cu @@ -1049,7 +1049,7 @@ void GraphDataGenerator::AllocResource(const paddle::platform::Place &place, place_ = place; gpuid_ = place_.GetDeviceId(); VLOG(3) << "gpuid " << gpuid_; - stream_ = dynamic_cast( + stream_ = dynamic_cast( platform::DeviceContextPool::Instance().Get(place)) ->stream(); feed_vec_ = feed_vec; diff --git a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu index e57a02d72999c153b5a68279c5fe553593d9bd2f..f05fe6c95de0a5c63f45aed21daa2d4a39321f83 100644 --- a/paddle/fluid/framework/fleet/heter_ps/feature_value.cu +++ b/paddle/fluid/framework/fleet/heter_ps/feature_value.cu @@ -394,7 +394,7 @@ void AccessorWrapper::CopyForPullDedupImpl( const int* slot_dims, const uint32_t* gpu_restore_idx, int pull_value_size) { - auto stream = dynamic_cast( + auto stream = dynamic_cast( paddle::platform::DeviceContextPool::Instance().Get(place)) ->stream(); size_t N = total_length * hidden_size; @@ -428,7 +428,7 @@ void AccessorWrapper::CopyForPushDedupImpl( const int* key2slot, const uint32_t* d_restore_idx, const size_t grad_value_size) { - auto stream = dynamic_cast( + auto stream = dynamic_cast( paddle::platform::DeviceContextPool::Instance().Get(place)) ->stream(); cudaMemsetAsync( @@ -470,7 +470,7 @@ void AccessorWrapper::CopyForPushDedupImpl( const uint32_t* gpu_sort_offset, const uint32_t* gpu_sort_lens, const size_t grad_value_size) { - auto stream = dynamic_cast( + auto stream = dynamic_cast( paddle::platform::DeviceContextPool::Instance().Get(place)) ->stream(); // merge all grad to one diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc index bbeb5977635e9780317eaaf6bce365dd4ff43910..40597aed31f3d0ec04dc6ac7d7ea2b1d09cb9fb7 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc @@ -1130,7 +1130,7 @@ void PSGPUWrapper::PullSparse(const paddle::platform::Place& place, VLOG(3) << "[" << device_id << "]Begin copy keys, key_num[" << total_length << "] dedup mode"; - auto stream = dynamic_cast( + auto stream = dynamic_cast( platform::DeviceContextPool::Instance().Get(place)) ->stream(); @@ -1399,7 +1399,7 @@ void PSGPUWrapper::PushSparseGrad(const paddle::platform::Place& place, VLOG(3) << "Begin push sparse, key_num[" << total_length << "] dedup mode, device:" << device_id << ", index" << devid_2_index; - auto stream = dynamic_cast( + auto stream = dynamic_cast( platform::DeviceContextPool::Instance().Get(place)) ->stream(); uint64_t* total_keys = dev.keys_tensor.data(); diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu index 36b789bdd11084a75e9b68779b9fa69508146e26..7f27b6889fc9818708e41298f5c63868bc88d71b 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu @@ -128,7 +128,7 @@ void PSGPUWrapper::CopyKeys(const paddle::platform::Place& place, int slot_num, int total_len, int* key2slot) { - auto stream = dynamic_cast( + auto stream = dynamic_cast( platform::DeviceContextPool::Instance().Get(place)) ->stream(); CopyKeysKernel2<<>>(