From bba13e219f9586ab52ef26d630c65144b2dfcf85 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Sat, 20 Aug 2022 20:39:51 +0800 Subject: [PATCH] [Eager] pylayer detach output tensor if it is equal with input (#45065) * pylayer detach output tensor if it is equal with input * pylayer detach output tensor if it is equal with input --- paddle/fluid/pybind/eager.h | 2 +- paddle/fluid/pybind/eager_py_layer.cc | 166 ++++++++++++------ python/paddle/autograd/py_layer.py | 47 ++++- .../fluid/tests/unittests/test_pylayer_op.py | 9 +- 4 files changed, 157 insertions(+), 67 deletions(-) diff --git a/paddle/fluid/pybind/eager.h b/paddle/fluid/pybind/eager.h index 5560744ae1d..f617ead08e2 100644 --- a/paddle/fluid/pybind/eager.h +++ b/paddle/fluid/pybind/eager.h @@ -29,7 +29,7 @@ typedef struct { typedef struct { PyObject_HEAD PyObject* container; PyObject* non_differentiable; - PyObject* dirty_tensors; + PyObject* not_inplace_tensors; bool materialize_grads; std::vector forward_input_tensor_is_duplicable; std::vector forward_output_tensor_is_duplicable; diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index b841afff157..7e25b06e80a 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -92,8 +92,8 @@ static void PyLayerDealloc(PyLayerObject* self) { if (self->non_differentiable) { Py_DECREF(self->non_differentiable); } - if (self->dirty_tensors) { - Py_DECREF(self->dirty_tensors); + if (self->not_inplace_tensors) { + Py_DECREF(self->not_inplace_tensors); } self->grad_node.~weak_ptr(); self->forward_input_tensor_is_duplicable.~vector(); @@ -108,6 +108,20 @@ PyObject* pylayer_method_name(PyObject* self, PyObject* noargs) { EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* new_tensor_with_impl(paddle::experimental::Tensor* tensor) { + PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0); + if (obj) { + auto v = reinterpret_cast(obj); + new (&(v->tensor)) paddle::experimental::Tensor(); + v->tensor.set_impl(tensor->impl()); + v->tensor.set_name(egr::Controller::Instance().GenerateUniqueName()); + } else { + PADDLE_THROW(platform::errors::Fatal( + "tp_alloc return null, can not new a PyObject.")); + } + return obj; +} + PyObject* pylayer_method_apply(PyObject* cls, PyObject* args, PyObject* kwargs) { @@ -151,6 +165,7 @@ PyObject* pylayer_method_apply(PyObject* cls, inputs_tensor.reserve(inputs_size); ctx->forward_input_tensor_is_duplicable.clear(); ctx->forward_input_tensor_is_duplicable.reserve(inputs_size); + std::set input_tensorbases; for (size_t i = 0; i < inputs_size; i++) { PyObject* obj = nullptr; if (i >= args_size) { @@ -159,6 +174,8 @@ PyObject* pylayer_method_apply(PyObject* cls, obj = PyTuple_GET_ITEM(args, i); } if (IsEagerTensor(obj)) { + input_tensorbases.insert( + reinterpret_cast(obj)->tensor.impl().get()); auto autograd_meta = egr::EagerUtils::nullable_autograd_meta( reinterpret_cast(obj)->tensor); inputs_autograd_meta.push_back({autograd_meta}); @@ -173,10 +190,12 @@ PyObject* pylayer_method_apply(PyObject* cls, } else if (PyList_Check(obj)) { std::vector tensors; Py_ssize_t len = PyList_Size(obj); - for (Py_ssize_t i = 0; i < len; i++) { - if (IsEagerTensor(PyList_GetItem(obj, i))) { - tensors.push_back(&( - reinterpret_cast(PyList_GetItem(obj, i))->tensor)); + for (Py_ssize_t j = 0; j < len; j++) { + PyObject* o = PyList_GetItem(obj, j); + if (IsEagerTensor(o)) { + input_tensorbases.insert( + reinterpret_cast(o)->tensor.impl().get()); + tensors.push_back(&(reinterpret_cast(o)->tensor)); } } if (!tensors.empty()) { @@ -194,11 +213,12 @@ PyObject* pylayer_method_apply(PyObject* cls, } else if (PyTuple_Check(obj)) { std::vector tensors; Py_ssize_t len = PyTuple_Size(obj); - for (Py_ssize_t i = 0; i < len; i++) { - if (IsEagerTensor(PyTuple_GetItem(obj, i))) { - tensors.push_back( - &(reinterpret_cast(PyTuple_GetItem(obj, i)) - ->tensor)); + for (Py_ssize_t j = 0; j < len; j++) { + PyObject* o = PyTuple_GetItem(obj, j); + if (IsEagerTensor(o)) { + input_tensorbases.insert( + reinterpret_cast(o)->tensor.impl().get()); + tensors.push_back(&(reinterpret_cast(o)->tensor)); } } if (!tensors.empty()) { @@ -252,6 +272,13 @@ PyObject* pylayer_method_apply(PyObject* cls, PyTuple_SET_ITEM(outputs_tuple, 0, outputs); } + std::set inplace_tensors; + std::set not_inplace_tensorbases; + auto not_inplace_tensors = GetTensorsFromPyObject(ctx->not_inplace_tensors); + for (auto it : not_inplace_tensors) { + not_inplace_tensorbases.insert(it->impl().get()); + } + auto outputs_size = PyTuple_GET_SIZE(outputs_tuple); std::vector> outputs_tensor; outputs_tensor.reserve(outputs_size); @@ -267,13 +294,39 @@ PyObject* pylayer_method_apply(PyObject* cls, outputs_autograd_meta.push_back({egr::EagerUtils::autograd_meta( &(reinterpret_cast(obj)->tensor))}); ctx->forward_output_tensor_is_duplicable.push_back(false); + if (input_tensorbases.count( + reinterpret_cast(obj)->tensor.impl().get())) { + if (not_inplace_tensorbases.count( + reinterpret_cast(obj)->tensor.impl().get())) { + PyTuple_SET_ITEM(outputs_tuple, + i, + new_tensor_with_impl(&( + reinterpret_cast(obj)->tensor))); + } else { + inplace_tensors.insert( + &(reinterpret_cast(obj)->tensor)); + } + } } else if (PyList_Check(obj)) { std::vector tensors; Py_ssize_t len = PyList_Size(obj); - for (Py_ssize_t i = 0; i < len; i++) { - if (IsEagerTensor(PyList_GetItem(obj, i))) { - tensors.push_back(&( - reinterpret_cast(PyList_GetItem(obj, i))->tensor)); + for (Py_ssize_t j = 0; j < len; j++) { + PyObject* o = PyList_GetItem(obj, j); + if (IsEagerTensor(o)) { + tensors.push_back(&(reinterpret_cast(o)->tensor)); + if (input_tensorbases.count( + reinterpret_cast(o)->tensor.impl().get())) { + if (not_inplace_tensorbases.count( + reinterpret_cast(o)->tensor.impl().get())) { + PyTuple_SetItem(obj, + j, + new_tensor_with_impl(&( + reinterpret_cast(o)->tensor))); + } else { + inplace_tensors.insert( + &(reinterpret_cast(o)->tensor)); + } + } } } if (!tensors.empty()) { @@ -285,11 +338,23 @@ PyObject* pylayer_method_apply(PyObject* cls, } else if (PyTuple_Check(obj)) { std::vector tensors; Py_ssize_t len = PyTuple_Size(obj); - for (Py_ssize_t i = 0; i < len; i++) { - if (IsEagerTensor(PyTuple_GetItem(obj, i))) { - tensors.push_back( - &(reinterpret_cast(PyTuple_GetItem(obj, i)) - ->tensor)); + for (Py_ssize_t j = 0; j < len; j++) { + PyObject* o = PyTuple_GetItem(obj, j); + if (IsEagerTensor(o)) { + tensors.push_back(&(reinterpret_cast(o)->tensor)); + if (input_tensorbases.count( + reinterpret_cast(o)->tensor.impl().get())) { + if (not_inplace_tensorbases.count( + reinterpret_cast(o)->tensor.impl().get())) { + PyTuple_SetItem(obj, + j, + new_tensor_with_impl(&( + reinterpret_cast(o)->tensor))); + } else { + inplace_tensors.insert( + &(reinterpret_cast(o)->tensor)); + } + } } } if (!tensors.empty()) { @@ -320,21 +385,19 @@ PyObject* pylayer_method_apply(PyObject* cls, } } - // add inplace strategy, inplaced tensor is ctx->dirty_tensors - auto dirty_tensors = GetTensorsFromPyObject(ctx->dirty_tensors); - for (auto it = dirty_tensors.begin(); it != dirty_tensors.end(); ++it) { - auto dirty_tensor = *it; - auto dirty_tensor_autograd_meta = - egr::EagerUtils::autograd_meta(dirty_tensor); - PADDLE_ENFORCE_EQ(!dirty_tensor_autograd_meta->StopGradient() && - egr::egr_utils_api::IsLeafTensor(*dirty_tensor), + for (auto it = inplace_tensors.begin(); it != inplace_tensors.end(); ++it) { + auto inplace_tensor = *it; + auto inplace_tensor_autograd_meta = + egr::EagerUtils::autograd_meta(inplace_tensor); + PADDLE_ENFORCE_EQ(!inplace_tensor_autograd_meta->StopGradient() && + egr::egr_utils_api::IsLeafTensor(*inplace_tensor), false, paddle::platform::errors::InvalidArgument( "Leaf Var (%s) that doesn't stop gradient " "can't use inplace strategy.", - dirty_tensor->name())); - dirty_tensor->bump_inplace_version(); - VLOG(3) << "Tensor(" << dirty_tensor->name() + inplace_tensor->name())); + inplace_tensor->bump_inplace_version(); + VLOG(3) << "Tensor(" << inplace_tensor->name() << ") uses Inplace Strategy."; } @@ -376,7 +439,10 @@ PyObject* pylayer_method_apply(PyObject* cls, VLOG(6) << "PyLayer construct backward node finish..."; } - if (!PyTuple_Check(outputs)) { + if (outputs_size == 1) { + Py_XDECREF(outputs); + outputs = PyTuple_GetItem(outputs_tuple, 0); + Py_INCREF(outputs); Py_XDECREF(outputs_tuple); } Py_XDECREF(forward_args); @@ -389,12 +455,6 @@ PyObject* pylayer_method_apply(PyObject* cls, EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* pylayer_method_register_hook(PyObject* _self, PyObject* hook) { - EAGER_TRY - return nullptr; - EAGER_CATCH_AND_THROW_RETURN_NULL -} - PyObject* tensor_properties_get_container(PyLayerObject* self, void* closure) { EAGER_TRY if (self->container == nullptr) { @@ -438,24 +498,24 @@ int tensor_properties_set_non_differentiable(PyLayerObject* self, EAGER_CATCH_AND_THROW_RETURN_NEG } -PyObject* tensor_properties_get_dirty_tensors(PyLayerObject* self, - void* closure) { +PyObject* tensor_properties_get_not_inplace_tensors(PyLayerObject* self, + void* closure) { EAGER_TRY - if (self->dirty_tensors == nullptr) { + if (self->not_inplace_tensors == nullptr) { RETURN_PY_NONE; } - Py_INCREF(self->dirty_tensors); - return self->dirty_tensors; + Py_INCREF(self->not_inplace_tensors); + return self->not_inplace_tensors; EAGER_CATCH_AND_THROW_RETURN_NULL } -int tensor_properties_set_dirty_tensors(PyLayerObject* self, - PyObject* value, - void* closure) { +int tensor_properties_set_not_inplace_tensors(PyLayerObject* self, + PyObject* value, + void* closure) { EAGER_TRY Py_XINCREF(value); - Py_XDECREF(self->dirty_tensors); - self->dirty_tensors = value; + Py_XDECREF(self->not_inplace_tensors); + self->not_inplace_tensors = value; return 0; EAGER_CATCH_AND_THROW_RETURN_NEG } @@ -478,10 +538,6 @@ PyMethodDef pylayer_methods[] = { (PyCFunction)(void (*)(void))pylayer_method_apply, METH_CLASS | METH_VARARGS | METH_KEYWORDS, NULL}, - {"register_hook", - (PyCFunction)(void (*)(void))pylayer_method_register_hook, - METH_O, - NULL}, {NULL, NULL, 0, NULL}}; struct PyGetSetDef pylayer_properties[] { @@ -495,9 +551,9 @@ struct PyGetSetDef pylayer_properties[] { (setter)tensor_properties_set_non_differentiable, nullptr, nullptr}, - {"dirty_tensors", - (getter)tensor_properties_get_dirty_tensors, - (setter)tensor_properties_set_dirty_tensors, + {"not_inplace_tensors", + (getter)tensor_properties_get_not_inplace_tensors, + (setter)tensor_properties_set_not_inplace_tensors, nullptr, nullptr}, {"materialize_grads", diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py index 22fc8bf47c1..673b047d5a3 100644 --- a/python/paddle/autograd/py_layer.py +++ b/python/paddle/autograd/py_layer.py @@ -407,13 +407,50 @@ class EagerPyLayerContext(object): """ return self.container - def mark_dirty(self, *args): - self.dirty_tensors = args + def mark_not_inplace(self, *args): + """ + Marks inputs as not inplace. + This should be called at most once, only from inside the `forward` method, + and all arguments should be Tensor inputs. + + If the Tensor returned by `forward` method is the same as the Tensor input of forward, + and this Tensor is marked as not_inplace, then Paddle will help the user create a new Tensor as output. + Thereby preventing the auto grad information of the input Tensor from being overwritten. + + Examples: + .. code-block:: python + + import paddle + + class Exp(paddle.autograd.PyLayer): + @staticmethod + def forward(ctx, x): + ctx.mark_not_inplace(x) + return x + + @staticmethod + def backward(ctx, grad_output): + out = grad_output.exp() + return out + + x = paddle.randn((1, 1)) + x.stop_gradient = False + attn_layers = [] + for idx in range(0, 2): + attn_layers.append(Exp()) + + for step in range(0, 2): + a = x + for j in range(0,2): + a = attn_layers[j].apply(x) + a.backward() + """ + self.not_inplace_tensors = args def mark_non_differentiable(self, *args): """ Marks outputs as non-differentiable. - This should be called at most once, only from inside thethe `forward` method, + This should be called at most once, only from inside the `forward` method, and all arguments should be tensor outputs. This will mark outputs as not requiring gradients, increasing the @@ -475,7 +512,7 @@ class EagerPyLayerContext(object): class Tanh(PyLayer): @staticmethod def forward(ctx, x): - return x, x+x + return x+x+x, x+x @staticmethod def backward(ctx, grad, grad2): @@ -486,7 +523,7 @@ class EagerPyLayerContext(object): @staticmethod def forward(ctx, x): ctx.set_materialize_grads(False) - return x, x+x + return x+x+x, x+x @staticmethod def backward(ctx, grad, grad2): diff --git a/python/paddle/fluid/tests/unittests/test_pylayer_op.py b/python/paddle/fluid/tests/unittests/test_pylayer_op.py index f7f5e81b841..eb6502a97a6 100644 --- a/python/paddle/fluid/tests/unittests/test_pylayer_op.py +++ b/python/paddle/fluid/tests/unittests/test_pylayer_op.py @@ -480,7 +480,7 @@ class TestPyLayer(unittest.TestCase): super(Layer, self).__init__() def forward(self, data): - data = paddle.nn.functional.relu(data) + data = data**2 z = paddle.tanh(data) z = cus_tanh.apply(data) return z.mean() @@ -506,7 +506,6 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): - ctx.mark_dirty(x) return x @staticmethod @@ -543,7 +542,6 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): - ctx.mark_dirty(x) return x @staticmethod @@ -578,7 +576,6 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): - ctx.mark_dirty(x) return x @staticmethod @@ -612,8 +609,6 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): - if in_dygraph_mode(): - ctx.mark_dirty(x) return x @staticmethod @@ -710,6 +705,7 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): + ctx.mark_not_inplace(x) return x, x + x @staticmethod @@ -728,6 +724,7 @@ class TestPyLayer(unittest.TestCase): @staticmethod def forward(ctx, x): + ctx.mark_not_inplace(x) ctx.set_materialize_grads(False) return x, x + x -- GitLab