diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index df04cfc46190fc45222754e83503d83289c7f230..c3a90149a1f0daa6c5240ba2e65da41ad6f135de 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -149,6 +149,7 @@ struct ScaleLossGradOpHandle : public OpHandle { auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); + cudaSetDevice(boost::get(place_).device); VLOG(3) << "1"; PADDLE_ENFORCE(cudaGetLastError()); VLOG(3) << "2"; @@ -163,7 +164,7 @@ struct ScaleLossGradOpHandle : public OpHandle { void Wait(platform::DeviceContext *waited_dev) override { if (platform::is_cpu_place(waited_dev->GetPlace())) { - this->dev_ctx_.at(place_)->Wait(); + dev_ctx_.at(place_)->Wait(); } else { auto stream = static_cast(waited_dev)->stream();