未验证 提交 b106c424 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] refine gil use (#46452)

* refine gil use
上级 a02eb143
...@@ -27,6 +27,11 @@ ...@@ -27,6 +27,11 @@
#include "pybind11/pytypes.h" #include "pybind11/pytypes.h"
namespace egr { namespace egr {
GradNodePyLayer::~GradNodePyLayer() {
pybind11::gil_scoped_acquire gil;
Py_XDECREF(ctx_);
}
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
GradNodePyLayer::operator()( GradNodePyLayer::operator()(
......
...@@ -34,7 +34,7 @@ class GradNodePyLayer : public GradNodeBase { ...@@ -34,7 +34,7 @@ class GradNodePyLayer : public GradNodeBase {
Py_INCREF(ctx_); Py_INCREF(ctx_);
} }
~GradNodePyLayer() override { Py_XDECREF(ctx_); }; ~GradNodePyLayer() override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
......
...@@ -156,6 +156,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -156,6 +156,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
} }
if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned()) { if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned()) {
eager_gil_scoped_release guard;
platform::CPUPlace place; platform::CPUPlace place;
if (self->tensor.is_selected_rows()) { if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value"; VLOG(6) << "Getting SelectedRows's numpy value";
...@@ -186,6 +187,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -186,6 +187,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (self->tensor.is_gpu()) { } else if (self->tensor.is_gpu()) {
eager_gil_scoped_release guard;
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
gpuMemcpyKind kind = cudaMemcpyDeviceToHost; gpuMemcpyKind kind = cudaMemcpyDeviceToHost;
#elif defined(PADDLE_WITH_HIP) #elif defined(PADDLE_WITH_HIP)
...@@ -244,6 +246,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -244,6 +246,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
#endif #endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (self->tensor.is_custom_device()) { } else if (self->tensor.is_custom_device()) {
eager_gil_scoped_release guard;
if (self->tensor.is_selected_rows()) { if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value"; VLOG(6) << "Getting SelectedRows's numpy value";
auto* selected_rows = auto* selected_rows =
...@@ -311,8 +314,8 @@ static PyObject* tensor_method_numpy_for_string_tensor(TensorObject* self, ...@@ -311,8 +314,8 @@ static PyObject* tensor_method_numpy_for_string_tensor(TensorObject* self,
const auto* st_ptr = string_tensor->data(); const auto* st_ptr = string_tensor->data();
auto numel = self->tensor.numel(); auto numel = self->tensor.numel();
auto tensor_dims = self->tensor.shape(); auto tensor_dims = self->tensor.shape();
// Get the max unicode length of StringTensor to create numpy unicode string // Get the max unicode length of StringTensor to create numpy unicode
// array. // string array.
auto* longest_pstring = std::max_element( auto* longest_pstring = std::max_element(
st_ptr, st_ptr + numel, [](const auto& a, const auto& b) { st_ptr, st_ptr + numel, [](const auto& a, const auto& b) {
auto a_unicode_len = auto a_unicode_len =
...@@ -394,14 +397,18 @@ static PyObject* tensor_method__copy_to(TensorObject* self, ...@@ -394,14 +397,18 @@ static PyObject* tensor_method__copy_to(TensorObject* self,
EAGER_TRY EAGER_TRY
auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 0), 0); auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 0), 0);
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
auto cp_tensor = self->tensor.copy_to(place, blocking); paddle::experimental::Tensor cp_tensor;
if (!blocking) { {
IncreaseTensorReferenceCountUntilCopyComplete(self->tensor, place); eager_gil_scoped_release guard;
cp_tensor = self->tensor.copy_to(place, blocking);
if (!blocking) {
IncreaseTensorReferenceCountUntilCopyComplete(self->tensor, place);
}
egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true);
egr::EagerUtils::autograd_meta(&cp_tensor)
->SetPersistable(
egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable());
} }
egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true);
egr::EagerUtils::autograd_meta(&cp_tensor)
->SetPersistable(
egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable());
return ToPyObject(cp_tensor); return ToPyObject(cp_tensor);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
...@@ -410,11 +417,15 @@ static PyObject* tensor_method_cpu(TensorObject* self, ...@@ -410,11 +417,15 @@ static PyObject* tensor_method_cpu(TensorObject* self,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
auto cp_tensor = self->tensor.copy_to(phi::CPUPlace(), true); paddle::experimental::Tensor cp_tensor;
egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true); {
egr::EagerUtils::autograd_meta(&cp_tensor) eager_gil_scoped_release guard;
->SetPersistable( cp_tensor = self->tensor.copy_to(phi::CPUPlace(), true);
egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable()); egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true);
egr::EagerUtils::autograd_meta(&cp_tensor)
->SetPersistable(
egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable());
}
return ToPyObject(cp_tensor); return ToPyObject(cp_tensor);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
...@@ -450,6 +461,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, ...@@ -450,6 +461,7 @@ static PyObject* tensor_method_copy_(TensorObject* self,
VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name(); << self->tensor.name();
if (!self->tensor.initialized()) { if (!self->tensor.initialized()) {
eager_gil_scoped_release guard;
egr::EagerUtils::autograd_meta(&(self->tensor)) egr::EagerUtils::autograd_meta(&(self->tensor))
->SetStopGradient( ->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
...@@ -461,6 +473,7 @@ static PyObject* tensor_method_copy_(TensorObject* self, ...@@ -461,6 +473,7 @@ static PyObject* tensor_method_copy_(TensorObject* self,
} }
} else { } else {
if (src_tensor.initialized()) { if (src_tensor.initialized()) {
eager_gil_scoped_release guard;
self->tensor.copy_(src_tensor, self->tensor.place(), blocking); self->tensor.copy_(src_tensor, self->tensor.place(), blocking);
} }
} }
...@@ -476,16 +489,19 @@ static PyObject* tensor_method_clone(TensorObject* self, ...@@ -476,16 +489,19 @@ static PyObject* tensor_method_clone(TensorObject* self,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
paddle::experimental::Tensor out;
{
eager_gil_scoped_release guard;
PADDLE_ENFORCE_EQ(
self->tensor.initialized(),
true,
paddle::platform::errors::InvalidArgument(
"We can only support initialized tensor in clone, however we got "
"uninitialized tensor %s, please check your code.",
self->tensor.name()));
PADDLE_ENFORCE_EQ( out = assign_ad_func(self->tensor);
self->tensor.initialized(), }
true,
paddle::platform::errors::InvalidArgument(
"We can only support initialized tensor in clone, however we got "
"uninitialized tensor %s, please check your code.",
self->tensor.name()));
auto out = assign_ad_func(self->tensor);
return ToPyObject(out); return ToPyObject(out);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
...@@ -495,6 +511,7 @@ static PyObject* tensor_retain_grads(TensorObject* self, ...@@ -495,6 +511,7 @@ static PyObject* tensor_retain_grads(TensorObject* self,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
if (egr::Controller::Instance().HasGrad()) { if (egr::Controller::Instance().HasGrad()) {
eager_gil_scoped_release guard;
auto meta = egr::EagerUtils::autograd_meta(&(self->tensor)); auto meta = egr::EagerUtils::autograd_meta(&(self->tensor));
if (!meta->GetMutableGradNode()) { if (!meta->GetMutableGradNode()) {
VLOG(6) << "Make grad node of tensor: " << self->tensor.name() VLOG(6) << "Make grad node of tensor: " << self->tensor.name()
...@@ -535,6 +552,7 @@ static PyObject* tensor_clear_gradient(TensorObject* self, ...@@ -535,6 +552,7 @@ static PyObject* tensor_clear_gradient(TensorObject* self,
} }
if (grad->impl()) { if (grad->impl()) {
eager_gil_scoped_release guard;
if (grad->is_selected_rows()) { if (grad->is_selected_rows()) {
auto selected_rows = auto selected_rows =
std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl()); std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl());
...@@ -577,6 +595,7 @@ static PyObject* tensor__zero_grads(TensorObject* self, ...@@ -577,6 +595,7 @@ static PyObject* tensor__zero_grads(TensorObject* self,
VLOG(4) << "ZeroGrads " << self->tensor.name(); VLOG(4) << "ZeroGrads " << self->tensor.name();
if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { if (egr::egr_utils_api::IsLeafTensor(self->tensor)) {
eager_gil_scoped_release guard;
// Add RetainGrad as PostHook to AccumulationNode // Add RetainGrad as PostHook to AccumulationNode
paddle::experimental::Tensor* grad = paddle::experimental::Tensor* grad =
egr::EagerUtils::mutable_grad(self->tensor); egr::EagerUtils::mutable_grad(self->tensor);
...@@ -595,6 +614,7 @@ static PyObject* tensor__zero_grads(TensorObject* self, ...@@ -595,6 +614,7 @@ static PyObject* tensor__zero_grads(TensorObject* self,
} }
} }
} else { } else {
eager_gil_scoped_release guard;
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor);
if (meta->MutableGrad()->initialized()) { if (meta->MutableGrad()->initialized()) {
if (meta->MutableGrad()->is_dense_tensor()) { if (meta->MutableGrad()->is_dense_tensor()) {
...@@ -855,6 +875,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, ...@@ -855,6 +875,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
decrease_axis.end()); decrease_axis.end());
if (op_type == "slice") { if (op_type == "slice") {
eager_gil_scoped_release guard;
out = slice_ad_func(self->tensor, out = slice_ad_func(self->tensor,
slice_axes_tmp, slice_axes_tmp,
slice_starts, slice_starts,
...@@ -862,6 +883,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, ...@@ -862,6 +883,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
infer_flags_tmp, infer_flags_tmp,
decrease_axis_tmp); decrease_axis_tmp);
} else if (op_type == "strided_slice") { } else if (op_type == "strided_slice") {
eager_gil_scoped_release guard;
out = strided_slice_ad_func( out = strided_slice_ad_func(
self->tensor, slice_axes, slice_starts, slice_ends, slice_strides); self->tensor, slice_axes, slice_starts, slice_ends, slice_strides);
} else { } else {
...@@ -886,28 +908,31 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self, ...@@ -886,28 +908,31 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
none_axes.pop_back(); none_axes.pop_back();
} }
if (!none_axes.empty()) { if (!none_axes.empty()) {
// Deal with cases that decrease_axes is not empty paddle::experimental::Tensor new_out;
// For example: {
// # x.shape: (2,3,4) eager_gil_scoped_release guard;
// out = x[0, 0:2, None] # out.shape : (2, 1, 4) // Deal with cases that decrease_axes is not empty
for (auto& axis : none_axes) { // For example:
int len = 0; // # x.shape: (2,3,4)
for (int da : decrease_axis) { // out = x[0, 0:2, None] # out.shape : (2, 1, 4)
if (da < axis) { for (auto& axis : none_axes) {
len++; int len = 0;
for (int da : decrease_axis) {
if (da < axis) {
len++;
}
} }
axis -= len;
} }
axis -= len; new_out = unsqueeze_ad_func(out, none_axes);
} }
paddle::experimental::Tensor new_out;
new_out = unsqueeze_ad_func(out, none_axes);
return ToPyObject(new_out); return ToPyObject(new_out);
} }
} }
// the index is a list // the index is a list
if (list_select_flag) { if (list_select_flag) {
eager_gil_scoped_release guard;
auto select_index = paddle::experimental::Tensor( auto select_index = paddle::experimental::Tensor(
egr::Controller::Instance().GenerateUniqueName()); egr::Controller::Instance().GenerateUniqueName());
auto idx_tensor = std::make_shared<phi::DenseTensor>(); auto idx_tensor = std::make_shared<phi::DenseTensor>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册