From 6e3856d3fb527f2691a4deeaf1ad7ae3dd749ed6 Mon Sep 17 00:00:00 2001 From: WangXi Date: Thu, 4 Feb 2021 15:31:06 +0800 Subject: [PATCH] fix xpu dygraph place (#30868) --- paddle/fluid/imperative/tests/test_tracer.cc | 28 +++++++++++-- paddle/fluid/imperative/tracer.cc | 20 +++++++++ paddle/fluid/imperative/tracer.h | 2 +- paddle/fluid/pybind/imperative.cc | 16 ------- paddle/fluid/pybind/tensor_py.h | 44 +++++--------------- 5 files changed, 55 insertions(+), 55 deletions(-) diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index bb6a48c6e64..c2ead38e4c1 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -305,10 +305,30 @@ TEST(test_tracer, test_expected_place) { // default expected place is CPUPlace imperative::Tracer tracer; ASSERT_EQ(platform::is_cpu_place(tracer.ExpectedPlace()), true); - // set to CUDAPlace - platform::CUDAPlace gpu_place(0); - tracer.SetExpectedPlace(gpu_place); - ASSERT_EQ(platform::is_gpu_place(tracer.ExpectedPlace()), true); + { +#ifdef PADDLE_WITH_CUDA + // set to CUDAPlace + platform::CUDAPlace gpu_place(0); + tracer.SetExpectedPlace(gpu_place); + ASSERT_EQ(platform::is_gpu_place(tracer.ExpectedPlace()), true); + + // assert throw + platform::XPUPlace xpu_place(0); + ASSERT_THROW(tracer.SetExpectedPlace(xpu_place), platform::EnforceNotMet); +#endif + } + { +#ifdef PADDLE_WITH_XPU + // set to XPUPlace + platform::XPUPlace xpu_place(0); + tracer.SetExpectedPlace(xpu_place); + ASSERT_EQ(platform::is_xpu_place(tracer.ExpectedPlace()), true); + + // assert throw + platform::CUDAPlace cuda_place(0); + ASSERT_THROW(tracer.SetExpectedPlace(cuda_place), platform::EnforceNotMet); +#endif + } } TEST(test_tracer, test_var_without_grad_var) { diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 1cf94c7a79e..7003e569d19 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -198,6 +198,26 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins, inplace_map); } +void Tracer::SetExpectedPlace(platform::Place place) { + // NOTE(wangxi): set device id before launch device kernel + if (platform::is_gpu_place(place)) { +#ifdef PADDLE_WITH_CUDA + platform::SetDeviceId(BOOST_GET_CONST(platform::CUDAPlace, place).device); +#else + PADDLE_THROW(platform::errors::PreconditionNotMet( + "PaddlePaddle should compile with GPU if use CUDAPlace.")); +#endif + } else if (platform::is_xpu_place(place)) { +#ifdef PADDLE_WITH_XPU + platform::SetXPUDeviceId(BOOST_GET_CONST(platform::XPUPlace, place).device); +#else + PADDLE_THROW(platform::errors::PreconditionNotMet( + "PaddlePaddle should compile with XPU if use XPUPlace.")); +#endif + } + expected_place_ = place; +} + bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins, const NameVarBaseMap& outs, bool trace_backward) { diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index d8c825666e7..b10d1b2d0b4 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -99,7 +99,7 @@ class Tracer { platform::Place ExpectedPlace() const { return expected_place_; } - void SetExpectedPlace(platform::Place place) { expected_place_ = place; } + void SetExpectedPlace(platform::Place place); bool HasGrad() const { return has_grad_; } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 6185b978511..4d2a7b6a4de 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1207,15 +1207,6 @@ void BindImperative(py::module *m_ptr) { if (py::isinstance(obj)) { auto p = obj.cast(); self.SetExpectedPlace(*p); - -// NOTE(zhiqiu): When switching cuda place, we need to set the -// cuda device id. -// Otherwise, some cuda API may be launched at other cuda place, -// which may cost hundreds of MB of GPU memory due to the cuda -// lib. -#ifdef PADDLE_WITH_CUDA - platform::SetDeviceId(p->device); -#endif VLOG(4) << "Tracer(" << &self << ")" << " set expected place " << *p; } else if (py::isinstance(obj)) { @@ -1236,13 +1227,6 @@ void BindImperative(py::module *m_ptr) { } else if (py::isinstance(obj)) { auto p = obj.cast(); self.SetExpectedPlace(*p); - if (platform::is_gpu_place(*p)) { -// NOTE(zhiqu): same as obj is CUDAPlace. -#ifdef PADDLE_WITH_CUDA - platform::SetDeviceId( - BOOST_GET_CONST(platform::CUDAPlace, *p).device); -#endif - } VLOG(4) << "Tracer(" << &self << ")" << " set expected place " << *p; } else { diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index e5db28c6f3e..5ddb498980d 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -259,38 +259,6 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { } } -// NOTE(wangxi): When copying data to the accelerator card, -// we need set_device(dev_id) first. -template -static int GetDeviceId(const P &place) { - // for CPUPlace and CUDAPinnedPlace. - PADDLE_THROW(platform::errors::PermissionDenied( - "Paddle can't Get CPUPlace or CUDAPinnedPlace Device Id.")); -} - -template <> -int GetDeviceId(const platform::CUDAPlace &place) { - return place.GetDeviceId(); -} - -template <> -int GetDeviceId(const platform::XPUPlace &place) { - return place.GetDeviceId(); -} - -// NOTE(wangxi16): Used by VarBase __setitem__ -template <> -int GetDeviceId(const platform::Place &place) { - if (paddle::platform::is_gpu_place(place)) { - return GetDeviceId(BOOST_GET_CONST(platform::CUDAPlace, place)); - } else if (paddle::platform::is_xpu_place(place)) { - return GetDeviceId(BOOST_GET_CONST(platform::XPUPlace, place)); - } - // for CPUPlace and CUDAPinnedPlace. - PADDLE_THROW(platform::errors::PermissionDenied( - "Paddle can't Get CPUPlace or CUDAPinnedPlace Device Id.")); -} - template void SetTensorFromPyArrayT( framework::Tensor *self, @@ -314,7 +282,11 @@ void SetTensorFromPyArrayT( } } else if (paddle::platform::is_xpu_place(place)) { #ifdef PADDLE_WITH_XPU - platform::XPUDeviceGuard guard(GetDeviceId(place)); + // NOTE(wangxi): When copying data to the accelerator card, + // we need set_device(dev_id) first. + platform::Place tmp_place = place; + platform::XPUDeviceGuard guard( + BOOST_GET_CONST(platform::XPUPlace, tmp_place).device); auto dst = self->mutable_data(place); xpu_memcpy(dst, array.data(), array.nbytes(), XPUMemcpyKind::XPU_HOST_TO_DEVICE); @@ -326,7 +298,11 @@ void SetTensorFromPyArrayT( } else { #ifdef PADDLE_WITH_CUDA if (paddle::platform::is_gpu_place(place)) { - platform::CUDADeviceGuard guard(GetDeviceId(place)); + // NOTE(wangxi): When copying data to the accelerator card, + // we need set_device(dev_id) first. + platform::Place tmp_place = place; + platform::CUDADeviceGuard guard( + BOOST_GET_CONST(platform::CUDAPlace, tmp_place).device); auto dst = self->mutable_data(place); paddle::platform::GpuMemcpySync(dst, array.data(), array.nbytes(), cudaMemcpyHostToDevice); -- GitLab