未验证 提交 74259bac 编写于 作者: R ronnywang 提交者: GitHub

fix npu plugin hang in backward in eager mode (#43614)

上级 69e99cc7
......@@ -29,7 +29,9 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
}
void GradTensorHolder::CopyValueFromTensor(
size_t slot_id, size_t rank, const paddle::experimental::Tensor& t,
size_t slot_id,
size_t rank,
const paddle::experimental::Tensor& t,
bool fill_one) {
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty;
......@@ -49,7 +51,9 @@ void GradTensorHolder::CopyValueFromTensor(
paddle::platform::errors::Fatal(
"Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank));
slot_id,
buffer_[slot_id].size(),
rank));
if (!fill_one) {
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
......@@ -83,7 +87,8 @@ void GradTensorHolder::CopyValueFromTensor(
}
}
void GradTensorHolder::add(size_t slot_id, size_t rank,
void GradTensorHolder::add(size_t slot_id,
size_t rank,
const paddle::experimental::Tensor& t,
bool create_graph) {
PADDLE_ENFORCE(slot_id < buffer_.size(),
......@@ -102,7 +107,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle::platform::errors::Fatal(
"Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank));
slot_id,
buffer_[slot_id].size(),
rank));
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
// TODO(jiabin): Code bellow is ugly to divide which inner var we used,
......@@ -115,7 +122,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
buffer_tensor = t;
} else {
// Accumulation
PADDLE_ENFORCE_EQ(t.initialized(), true,
PADDLE_ENFORCE_EQ(t.initialized(),
true,
paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor, but we "
"got tensor: %s is empty please check you network "
......@@ -124,7 +132,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if (t.is_dense_tensor()) {
if (buffer_tensor.is_dense_tensor()) {
if (create_graph) {
if (create_graph || t.is_custom_device()) {
buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor);
} else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
......@@ -136,8 +144,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
// add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer(
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t,
&new_buffer);
paddle::imperative::SelectedRowsAddTensor(
buffer_tensor, t, &new_buffer);
buffer_tensor.set_impl(new_buffer.impl());
}
} else if (t.is_sparse_coo_tensor()) {
......@@ -151,7 +159,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle::experimental::Tensor buffer_values(
std::make_shared<phi::DenseTensor>(
buffer_sparse->non_zero_elements()));
if (create_graph) {
if (create_graph || t.is_custom_device()) {
buffer_values =
add_final_state_dygraph_function(t_values, buffer_values);
} else {
......
......@@ -292,6 +292,32 @@ static PyObject* tensor_method_numpy(TensorObject* self,
dense_tensor->numel(),
kind);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (self->tensor.is_custom_device()) {
if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value";
auto* selected_rows =
static_cast<phi::SelectedRows*>(self->tensor.impl().get());
auto* dense_tensor = static_cast<paddle::framework::LoDTensor*>(
selected_rows->mutable_value());
phi::DeviceManager::GetDeviceWithPlace(self->tensor.place())
->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data,
dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) *
dense_tensor->numel());
} else {
VLOG(6) << "Getting DenseTensor's numpy value";
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
phi::DeviceManager::GetDeviceWithPlace(self->tensor.place())
->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data,
dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) *
dense_tensor->numel());
}
#endif
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
......
......@@ -286,6 +286,14 @@ class PADDLE_API Tensor final {
*/
bool is_gpu_pinned() const;
/**
* @brief Determine whether the tensor device is CustomDevice
*
* @return true
* @return false
*/
bool is_custom_device() const;
/* Part 4: Data Access methods */
/**
......
......@@ -177,6 +177,10 @@ bool Tensor::is_gpu_pinned() const {
return paddle::platform::is_cuda_pinned_place(place());
}
bool Tensor::is_custom_device() const {
return paddle::platform::is_custom_place(place());
}
/* Part 4: Data Access methods */
template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册