未验证 提交 6e3856d3 编写于 作者: W WangXi 提交者: GitHub

fix xpu dygraph place (#30868)

上级 35c5b23f
......@@ -305,10 +305,30 @@ TEST(test_tracer, test_expected_place) {
// default expected place is CPUPlace
imperative::Tracer tracer;
ASSERT_EQ(platform::is_cpu_place(tracer.ExpectedPlace()), true);
// set to CUDAPlace
platform::CUDAPlace gpu_place(0);
tracer.SetExpectedPlace(gpu_place);
ASSERT_EQ(platform::is_gpu_place(tracer.ExpectedPlace()), true);
{
#ifdef PADDLE_WITH_CUDA
// set to CUDAPlace
platform::CUDAPlace gpu_place(0);
tracer.SetExpectedPlace(gpu_place);
ASSERT_EQ(platform::is_gpu_place(tracer.ExpectedPlace()), true);
// assert throw
platform::XPUPlace xpu_place(0);
ASSERT_THROW(tracer.SetExpectedPlace(xpu_place), platform::EnforceNotMet);
#endif
}
{
#ifdef PADDLE_WITH_XPU
// set to XPUPlace
platform::XPUPlace xpu_place(0);
tracer.SetExpectedPlace(xpu_place);
ASSERT_EQ(platform::is_xpu_place(tracer.ExpectedPlace()), true);
// assert throw
platform::CUDAPlace cuda_place(0);
ASSERT_THROW(tracer.SetExpectedPlace(cuda_place), platform::EnforceNotMet);
#endif
}
}
TEST(test_tracer, test_var_without_grad_var) {
......
......@@ -198,6 +198,26 @@ void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
inplace_map);
}
void Tracer::SetExpectedPlace(platform::Place place) {
// NOTE(wangxi): set device id before launch device kernel
if (platform::is_gpu_place(place)) {
#ifdef PADDLE_WITH_CUDA
platform::SetDeviceId(BOOST_GET_CONST(platform::CUDAPlace, place).device);
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with GPU if use CUDAPlace."));
#endif
} else if (platform::is_xpu_place(place)) {
#ifdef PADDLE_WITH_XPU
platform::SetXPUDeviceId(BOOST_GET_CONST(platform::XPUPlace, place).device);
#else
PADDLE_THROW(platform::errors::PreconditionNotMet(
"PaddlePaddle should compile with XPU if use XPUPlace."));
#endif
}
expected_place_ = place;
}
bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins,
const NameVarBaseMap& outs,
bool trace_backward) {
......
......@@ -99,7 +99,7 @@ class Tracer {
platform::Place ExpectedPlace() const { return expected_place_; }
void SetExpectedPlace(platform::Place place) { expected_place_ = place; }
void SetExpectedPlace(platform::Place place);
bool HasGrad() const { return has_grad_; }
......
......@@ -1207,15 +1207,6 @@ void BindImperative(py::module *m_ptr) {
if (py::isinstance<platform::CUDAPlace>(obj)) {
auto p = obj.cast<platform::CUDAPlace *>();
self.SetExpectedPlace(*p);
// NOTE(zhiqiu): When switching cuda place, we need to set the
// cuda device id.
// Otherwise, some cuda API may be launched at other cuda place,
// which may cost hundreds of MB of GPU memory due to the cuda
// lib.
#ifdef PADDLE_WITH_CUDA
platform::SetDeviceId(p->device);
#endif
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else if (py::isinstance<platform::XPUPlace>(obj)) {
......@@ -1236,13 +1227,6 @@ void BindImperative(py::module *m_ptr) {
} else if (py::isinstance<platform::Place>(obj)) {
auto p = obj.cast<platform::Place *>();
self.SetExpectedPlace(*p);
if (platform::is_gpu_place(*p)) {
// NOTE(zhiqu): same as obj is CUDAPlace.
#ifdef PADDLE_WITH_CUDA
platform::SetDeviceId(
BOOST_GET_CONST(platform::CUDAPlace, *p).device);
#endif
}
VLOG(4) << "Tracer(" << &self << ")"
<< " set expected place " << *p;
} else {
......
......@@ -259,38 +259,6 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) {
}
}
// NOTE(wangxi): When copying data to the accelerator card,
// we need set_device(dev_id) first.
template <typename P>
static int GetDeviceId(const P &place) {
// for CPUPlace and CUDAPinnedPlace.
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't Get CPUPlace or CUDAPinnedPlace Device Id."));
}
template <>
int GetDeviceId<platform::CUDAPlace>(const platform::CUDAPlace &place) {
return place.GetDeviceId();
}
template <>
int GetDeviceId<platform::XPUPlace>(const platform::XPUPlace &place) {
return place.GetDeviceId();
}
// NOTE(wangxi16): Used by VarBase __setitem__
template <>
int GetDeviceId<platform::Place>(const platform::Place &place) {
if (paddle::platform::is_gpu_place(place)) {
return GetDeviceId(BOOST_GET_CONST(platform::CUDAPlace, place));
} else if (paddle::platform::is_xpu_place(place)) {
return GetDeviceId(BOOST_GET_CONST(platform::XPUPlace, place));
}
// for CPUPlace and CUDAPinnedPlace.
PADDLE_THROW(platform::errors::PermissionDenied(
"Paddle can't Get CPUPlace or CUDAPinnedPlace Device Id."));
}
template <typename T, typename P>
void SetTensorFromPyArrayT(
framework::Tensor *self,
......@@ -314,7 +282,11 @@ void SetTensorFromPyArrayT(
}
} else if (paddle::platform::is_xpu_place(place)) {
#ifdef PADDLE_WITH_XPU
platform::XPUDeviceGuard guard(GetDeviceId(place));
// NOTE(wangxi): When copying data to the accelerator card,
// we need set_device(dev_id) first.
platform::Place tmp_place = place;
platform::XPUDeviceGuard guard(
BOOST_GET_CONST(platform::XPUPlace, tmp_place).device);
auto dst = self->mutable_data<T>(place);
xpu_memcpy(dst, array.data(), array.nbytes(),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
......@@ -326,7 +298,11 @@ void SetTensorFromPyArrayT(
} else {
#ifdef PADDLE_WITH_CUDA
if (paddle::platform::is_gpu_place(place)) {
platform::CUDADeviceGuard guard(GetDeviceId(place));
// NOTE(wangxi): When copying data to the accelerator card,
// we need set_device(dev_id) first.
platform::Place tmp_place = place;
platform::CUDADeviceGuard guard(
BOOST_GET_CONST(platform::CUDAPlace, tmp_place).device);
auto dst = self->mutable_data<T>(place);
paddle::platform::GpuMemcpySync(dst, array.data(), array.nbytes(),
cudaMemcpyHostToDevice);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册