未验证 提交 74259bac 编写于 作者: R ronnywang 提交者: GitHub

fix npu plugin hang in backward in eager mode (#43614)

上级 69e99cc7
...@@ -29,7 +29,9 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) { ...@@ -29,7 +29,9 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
} }
void GradTensorHolder::CopyValueFromTensor( void GradTensorHolder::CopyValueFromTensor(
size_t slot_id, size_t rank, const paddle::experimental::Tensor& t, size_t slot_id,
size_t rank,
const paddle::experimental::Tensor& t,
bool fill_one) { bool fill_one) {
// TODO(jiabin): We need to deal with empty input_buffer with slot size not // TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty; // empty;
...@@ -49,7 +51,9 @@ void GradTensorHolder::CopyValueFromTensor( ...@@ -49,7 +51,9 @@ void GradTensorHolder::CopyValueFromTensor(
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Invalid rank for GradTensorHolder::add() which exceeds size " "Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d", "of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank)); slot_id,
buffer_[slot_id].size(),
rank));
if (!fill_one) { if (!fill_one) {
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank]; paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) { if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
...@@ -83,7 +87,8 @@ void GradTensorHolder::CopyValueFromTensor( ...@@ -83,7 +87,8 @@ void GradTensorHolder::CopyValueFromTensor(
} }
} }
void GradTensorHolder::add(size_t slot_id, size_t rank, void GradTensorHolder::add(size_t slot_id,
size_t rank,
const paddle::experimental::Tensor& t, const paddle::experimental::Tensor& t,
bool create_graph) { bool create_graph) {
PADDLE_ENFORCE(slot_id < buffer_.size(), PADDLE_ENFORCE(slot_id < buffer_.size(),
...@@ -102,7 +107,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, ...@@ -102,7 +107,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Invalid rank for GradTensorHolder::add() which exceeds size " "Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d", "of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank)); slot_id,
buffer_[slot_id].size(),
rank));
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank]; paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
// TODO(jiabin): Code bellow is ugly to divide which inner var we used, // TODO(jiabin): Code bellow is ugly to divide which inner var we used,
...@@ -115,7 +122,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, ...@@ -115,7 +122,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
buffer_tensor = t; buffer_tensor = t;
} else { } else {
// Accumulation // Accumulation
PADDLE_ENFORCE_EQ(t.initialized(), true, PADDLE_ENFORCE_EQ(t.initialized(),
true,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor, but we " "We can only accumulate initialized tensor, but we "
"got tensor: %s is empty please check you network " "got tensor: %s is empty please check you network "
...@@ -124,7 +132,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, ...@@ -124,7 +132,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if (t.is_dense_tensor()) { if (t.is_dense_tensor()) {
if (buffer_tensor.is_dense_tensor()) { if (buffer_tensor.is_dense_tensor()) {
if (create_graph) { if (create_graph || t.is_custom_device()) {
buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor); buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor);
} else { } else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>( paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
...@@ -136,8 +144,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, ...@@ -136,8 +144,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
// add_dygraph_function once it's supported // add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer( paddle::experimental::Tensor new_buffer(
std::make_shared<phi::DenseTensor>(), "tmp_accumulator"); std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t, paddle::imperative::SelectedRowsAddTensor(
&new_buffer); buffer_tensor, t, &new_buffer);
buffer_tensor.set_impl(new_buffer.impl()); buffer_tensor.set_impl(new_buffer.impl());
} }
} else if (t.is_sparse_coo_tensor()) { } else if (t.is_sparse_coo_tensor()) {
...@@ -151,7 +159,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, ...@@ -151,7 +159,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
paddle::experimental::Tensor buffer_values( paddle::experimental::Tensor buffer_values(
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
buffer_sparse->non_zero_elements())); buffer_sparse->non_zero_elements()));
if (create_graph) { if (create_graph || t.is_custom_device()) {
buffer_values = buffer_values =
add_final_state_dygraph_function(t_values, buffer_values); add_final_state_dygraph_function(t_values, buffer_values);
} else { } else {
......
...@@ -292,6 +292,32 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -292,6 +292,32 @@ static PyObject* tensor_method_numpy(TensorObject* self,
dense_tensor->numel(), dense_tensor->numel(),
kind); kind);
} }
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (self->tensor.is_custom_device()) {
if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value";
auto* selected_rows =
static_cast<phi::SelectedRows*>(self->tensor.impl().get());
auto* dense_tensor = static_cast<paddle::framework::LoDTensor*>(
selected_rows->mutable_value());
phi::DeviceManager::GetDeviceWithPlace(self->tensor.place())
->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data,
dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) *
dense_tensor->numel());
} else {
VLOG(6) << "Getting DenseTensor's numpy value";
auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
phi::DeviceManager::GetDeviceWithPlace(self->tensor.place())
->MemoryCopyD2H(
pybind11::detail::array_proxy(array)->data,
dense_tensor->data(),
paddle::framework::DataTypeSize(dense_tensor->dtype()) *
dense_tensor->numel());
}
#endif #endif
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
......
...@@ -286,6 +286,14 @@ class PADDLE_API Tensor final { ...@@ -286,6 +286,14 @@ class PADDLE_API Tensor final {
*/ */
bool is_gpu_pinned() const; bool is_gpu_pinned() const;
/**
* @brief Determine whether the tensor device is CustomDevice
*
* @return true
* @return false
*/
bool is_custom_device() const;
/* Part 4: Data Access methods */ /* Part 4: Data Access methods */
/** /**
......
...@@ -177,6 +177,10 @@ bool Tensor::is_gpu_pinned() const { ...@@ -177,6 +177,10 @@ bool Tensor::is_gpu_pinned() const {
return paddle::platform::is_cuda_pinned_place(place()); return paddle::platform::is_cuda_pinned_place(place());
} }
bool Tensor::is_custom_device() const {
return paddle::platform::is_custom_place(place());
}
/* Part 4: Data Access methods */ /* Part 4: Data Access methods */
template <typename T> template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册