未验证 提交 bba13e21 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] pylayer detach output tensor if it is equal with input (#45065)

* pylayer detach output tensor if it is equal with input

* pylayer detach output tensor if it is equal with input
上级 197f4048
...@@ -29,7 +29,7 @@ typedef struct { ...@@ -29,7 +29,7 @@ typedef struct {
typedef struct { typedef struct {
PyObject_HEAD PyObject* container; PyObject_HEAD PyObject* container;
PyObject* non_differentiable; PyObject* non_differentiable;
PyObject* dirty_tensors; PyObject* not_inplace_tensors;
bool materialize_grads; bool materialize_grads;
std::vector<bool> forward_input_tensor_is_duplicable; std::vector<bool> forward_input_tensor_is_duplicable;
std::vector<bool> forward_output_tensor_is_duplicable; std::vector<bool> forward_output_tensor_is_duplicable;
......
...@@ -92,8 +92,8 @@ static void PyLayerDealloc(PyLayerObject* self) { ...@@ -92,8 +92,8 @@ static void PyLayerDealloc(PyLayerObject* self) {
if (self->non_differentiable) { if (self->non_differentiable) {
Py_DECREF(self->non_differentiable); Py_DECREF(self->non_differentiable);
} }
if (self->dirty_tensors) { if (self->not_inplace_tensors) {
Py_DECREF(self->dirty_tensors); Py_DECREF(self->not_inplace_tensors);
} }
self->grad_node.~weak_ptr<egr::GradNodePyLayer>(); self->grad_node.~weak_ptr<egr::GradNodePyLayer>();
self->forward_input_tensor_is_duplicable.~vector(); self->forward_input_tensor_is_duplicable.~vector();
...@@ -108,6 +108,20 @@ PyObject* pylayer_method_name(PyObject* self, PyObject* noargs) { ...@@ -108,6 +108,20 @@ PyObject* pylayer_method_name(PyObject* self, PyObject* noargs) {
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
PyObject* new_tensor_with_impl(paddle::experimental::Tensor* tensor) {
PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0);
if (obj) {
auto v = reinterpret_cast<TensorObject*>(obj);
new (&(v->tensor)) paddle::experimental::Tensor();
v->tensor.set_impl(tensor->impl());
v->tensor.set_name(egr::Controller::Instance().GenerateUniqueName());
} else {
PADDLE_THROW(platform::errors::Fatal(
"tp_alloc return null, can not new a PyObject."));
}
return obj;
}
PyObject* pylayer_method_apply(PyObject* cls, PyObject* pylayer_method_apply(PyObject* cls,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
...@@ -151,6 +165,7 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -151,6 +165,7 @@ PyObject* pylayer_method_apply(PyObject* cls,
inputs_tensor.reserve(inputs_size); inputs_tensor.reserve(inputs_size);
ctx->forward_input_tensor_is_duplicable.clear(); ctx->forward_input_tensor_is_duplicable.clear();
ctx->forward_input_tensor_is_duplicable.reserve(inputs_size); ctx->forward_input_tensor_is_duplicable.reserve(inputs_size);
std::set<phi::TensorBase*> input_tensorbases;
for (size_t i = 0; i < inputs_size; i++) { for (size_t i = 0; i < inputs_size; i++) {
PyObject* obj = nullptr; PyObject* obj = nullptr;
if (i >= args_size) { if (i >= args_size) {
...@@ -159,6 +174,8 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -159,6 +174,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
obj = PyTuple_GET_ITEM(args, i); obj = PyTuple_GET_ITEM(args, i);
} }
if (IsEagerTensor(obj)) { if (IsEagerTensor(obj)) {
input_tensorbases.insert(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get());
auto autograd_meta = egr::EagerUtils::nullable_autograd_meta( auto autograd_meta = egr::EagerUtils::nullable_autograd_meta(
reinterpret_cast<TensorObject*>(obj)->tensor); reinterpret_cast<TensorObject*>(obj)->tensor);
inputs_autograd_meta.push_back({autograd_meta}); inputs_autograd_meta.push_back({autograd_meta});
...@@ -173,10 +190,12 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -173,10 +190,12 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyList_Check(obj)) { } else if (PyList_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors; std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyList_Size(obj); Py_ssize_t len = PyList_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) { for (Py_ssize_t j = 0; j < len; j++) {
if (IsEagerTensor(PyList_GetItem(obj, i))) { PyObject* o = PyList_GetItem(obj, j);
tensors.push_back(&( if (IsEagerTensor(o)) {
reinterpret_cast<TensorObject*>(PyList_GetItem(obj, i))->tensor)); input_tensorbases.insert(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get());
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
} }
} }
if (!tensors.empty()) { if (!tensors.empty()) {
...@@ -194,11 +213,12 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -194,11 +213,12 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyTuple_Check(obj)) { } else if (PyTuple_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors; std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyTuple_Size(obj); Py_ssize_t len = PyTuple_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) { for (Py_ssize_t j = 0; j < len; j++) {
if (IsEagerTensor(PyTuple_GetItem(obj, i))) { PyObject* o = PyTuple_GetItem(obj, j);
tensors.push_back( if (IsEagerTensor(o)) {
&(reinterpret_cast<TensorObject*>(PyTuple_GetItem(obj, i)) input_tensorbases.insert(
->tensor)); reinterpret_cast<TensorObject*>(o)->tensor.impl().get());
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
} }
} }
if (!tensors.empty()) { if (!tensors.empty()) {
...@@ -252,6 +272,13 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -252,6 +272,13 @@ PyObject* pylayer_method_apply(PyObject* cls,
PyTuple_SET_ITEM(outputs_tuple, 0, outputs); PyTuple_SET_ITEM(outputs_tuple, 0, outputs);
} }
std::set<paddle::experimental::Tensor*> inplace_tensors;
std::set<phi::TensorBase*> not_inplace_tensorbases;
auto not_inplace_tensors = GetTensorsFromPyObject(ctx->not_inplace_tensors);
for (auto it : not_inplace_tensors) {
not_inplace_tensorbases.insert(it->impl().get());
}
auto outputs_size = PyTuple_GET_SIZE(outputs_tuple); auto outputs_size = PyTuple_GET_SIZE(outputs_tuple);
std::vector<std::vector<paddle::experimental::Tensor*>> outputs_tensor; std::vector<std::vector<paddle::experimental::Tensor*>> outputs_tensor;
outputs_tensor.reserve(outputs_size); outputs_tensor.reserve(outputs_size);
...@@ -267,13 +294,39 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -267,13 +294,39 @@ PyObject* pylayer_method_apply(PyObject* cls,
outputs_autograd_meta.push_back({egr::EagerUtils::autograd_meta( outputs_autograd_meta.push_back({egr::EagerUtils::autograd_meta(
&(reinterpret_cast<TensorObject*>(obj)->tensor))}); &(reinterpret_cast<TensorObject*>(obj)->tensor))});
ctx->forward_output_tensor_is_duplicable.push_back(false); ctx->forward_output_tensor_is_duplicable.push_back(false);
if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get())) {
PyTuple_SET_ITEM(outputs_tuple,
i,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(obj)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(obj)->tensor));
}
}
} else if (PyList_Check(obj)) { } else if (PyList_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors; std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyList_Size(obj); Py_ssize_t len = PyList_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) { for (Py_ssize_t j = 0; j < len; j++) {
if (IsEagerTensor(PyList_GetItem(obj, i))) { PyObject* o = PyList_GetItem(obj, j);
tensors.push_back(&( if (IsEagerTensor(o)) {
reinterpret_cast<TensorObject*>(PyList_GetItem(obj, i))->tensor)); tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
PyTuple_SetItem(obj,
j,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(o)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
} }
} }
if (!tensors.empty()) { if (!tensors.empty()) {
...@@ -285,11 +338,23 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -285,11 +338,23 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyTuple_Check(obj)) { } else if (PyTuple_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors; std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyTuple_Size(obj); Py_ssize_t len = PyTuple_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) { for (Py_ssize_t j = 0; j < len; j++) {
if (IsEagerTensor(PyTuple_GetItem(obj, i))) { PyObject* o = PyTuple_GetItem(obj, j);
tensors.push_back( if (IsEagerTensor(o)) {
&(reinterpret_cast<TensorObject*>(PyTuple_GetItem(obj, i)) tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
->tensor)); if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
PyTuple_SetItem(obj,
j,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(o)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
} }
} }
if (!tensors.empty()) { if (!tensors.empty()) {
...@@ -320,21 +385,19 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -320,21 +385,19 @@ PyObject* pylayer_method_apply(PyObject* cls,
} }
} }
// add inplace strategy, inplaced tensor is ctx->dirty_tensors for (auto it = inplace_tensors.begin(); it != inplace_tensors.end(); ++it) {
auto dirty_tensors = GetTensorsFromPyObject(ctx->dirty_tensors); auto inplace_tensor = *it;
for (auto it = dirty_tensors.begin(); it != dirty_tensors.end(); ++it) { auto inplace_tensor_autograd_meta =
auto dirty_tensor = *it; egr::EagerUtils::autograd_meta(inplace_tensor);
auto dirty_tensor_autograd_meta = PADDLE_ENFORCE_EQ(!inplace_tensor_autograd_meta->StopGradient() &&
egr::EagerUtils::autograd_meta(dirty_tensor); egr::egr_utils_api::IsLeafTensor(*inplace_tensor),
PADDLE_ENFORCE_EQ(!dirty_tensor_autograd_meta->StopGradient() &&
egr::egr_utils_api::IsLeafTensor(*dirty_tensor),
false, false,
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"Leaf Var (%s) that doesn't stop gradient " "Leaf Var (%s) that doesn't stop gradient "
"can't use inplace strategy.", "can't use inplace strategy.",
dirty_tensor->name())); inplace_tensor->name()));
dirty_tensor->bump_inplace_version(); inplace_tensor->bump_inplace_version();
VLOG(3) << "Tensor(" << dirty_tensor->name() VLOG(3) << "Tensor(" << inplace_tensor->name()
<< ") uses Inplace Strategy."; << ") uses Inplace Strategy.";
} }
...@@ -376,7 +439,10 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -376,7 +439,10 @@ PyObject* pylayer_method_apply(PyObject* cls,
VLOG(6) << "PyLayer construct backward node finish..."; VLOG(6) << "PyLayer construct backward node finish...";
} }
if (!PyTuple_Check(outputs)) { if (outputs_size == 1) {
Py_XDECREF(outputs);
outputs = PyTuple_GetItem(outputs_tuple, 0);
Py_INCREF(outputs);
Py_XDECREF(outputs_tuple); Py_XDECREF(outputs_tuple);
} }
Py_XDECREF(forward_args); Py_XDECREF(forward_args);
...@@ -389,12 +455,6 @@ PyObject* pylayer_method_apply(PyObject* cls, ...@@ -389,12 +455,6 @@ PyObject* pylayer_method_apply(PyObject* cls,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
PyObject* pylayer_method_register_hook(PyObject* _self, PyObject* hook) {
EAGER_TRY
return nullptr;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* tensor_properties_get_container(PyLayerObject* self, void* closure) { PyObject* tensor_properties_get_container(PyLayerObject* self, void* closure) {
EAGER_TRY EAGER_TRY
if (self->container == nullptr) { if (self->container == nullptr) {
...@@ -438,24 +498,24 @@ int tensor_properties_set_non_differentiable(PyLayerObject* self, ...@@ -438,24 +498,24 @@ int tensor_properties_set_non_differentiable(PyLayerObject* self,
EAGER_CATCH_AND_THROW_RETURN_NEG EAGER_CATCH_AND_THROW_RETURN_NEG
} }
PyObject* tensor_properties_get_dirty_tensors(PyLayerObject* self, PyObject* tensor_properties_get_not_inplace_tensors(PyLayerObject* self,
void* closure) { void* closure) {
EAGER_TRY EAGER_TRY
if (self->dirty_tensors == nullptr) { if (self->not_inplace_tensors == nullptr) {
RETURN_PY_NONE; RETURN_PY_NONE;
} }
Py_INCREF(self->dirty_tensors); Py_INCREF(self->not_inplace_tensors);
return self->dirty_tensors; return self->not_inplace_tensors;
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
int tensor_properties_set_dirty_tensors(PyLayerObject* self, int tensor_properties_set_not_inplace_tensors(PyLayerObject* self,
PyObject* value, PyObject* value,
void* closure) { void* closure) {
EAGER_TRY EAGER_TRY
Py_XINCREF(value); Py_XINCREF(value);
Py_XDECREF(self->dirty_tensors); Py_XDECREF(self->not_inplace_tensors);
self->dirty_tensors = value; self->not_inplace_tensors = value;
return 0; return 0;
EAGER_CATCH_AND_THROW_RETURN_NEG EAGER_CATCH_AND_THROW_RETURN_NEG
} }
...@@ -478,10 +538,6 @@ PyMethodDef pylayer_methods[] = { ...@@ -478,10 +538,6 @@ PyMethodDef pylayer_methods[] = {
(PyCFunction)(void (*)(void))pylayer_method_apply, (PyCFunction)(void (*)(void))pylayer_method_apply,
METH_CLASS | METH_VARARGS | METH_KEYWORDS, METH_CLASS | METH_VARARGS | METH_KEYWORDS,
NULL}, NULL},
{"register_hook",
(PyCFunction)(void (*)(void))pylayer_method_register_hook,
METH_O,
NULL},
{NULL, NULL, 0, NULL}}; {NULL, NULL, 0, NULL}};
struct PyGetSetDef pylayer_properties[] { struct PyGetSetDef pylayer_properties[] {
...@@ -495,9 +551,9 @@ struct PyGetSetDef pylayer_properties[] { ...@@ -495,9 +551,9 @@ struct PyGetSetDef pylayer_properties[] {
(setter)tensor_properties_set_non_differentiable, (setter)tensor_properties_set_non_differentiable,
nullptr, nullptr,
nullptr}, nullptr},
{"dirty_tensors", {"not_inplace_tensors",
(getter)tensor_properties_get_dirty_tensors, (getter)tensor_properties_get_not_inplace_tensors,
(setter)tensor_properties_set_dirty_tensors, (setter)tensor_properties_set_not_inplace_tensors,
nullptr, nullptr,
nullptr}, nullptr},
{"materialize_grads", {"materialize_grads",
......
...@@ -407,13 +407,50 @@ class EagerPyLayerContext(object): ...@@ -407,13 +407,50 @@ class EagerPyLayerContext(object):
""" """
return self.container return self.container
def mark_dirty(self, *args): def mark_not_inplace(self, *args):
self.dirty_tensors = args """
Marks inputs as not inplace.
This should be called at most once, only from inside the `forward` method,
and all arguments should be Tensor inputs.
If the Tensor returned by `forward` method is the same as the Tensor input of forward,
and this Tensor is marked as not_inplace, then Paddle will help the user create a new Tensor as output.
Thereby preventing the auto grad information of the input Tensor from being overwritten.
Examples:
.. code-block:: python
import paddle
class Exp(paddle.autograd.PyLayer):
@staticmethod
def forward(ctx, x):
ctx.mark_not_inplace(x)
return x
@staticmethod
def backward(ctx, grad_output):
out = grad_output.exp()
return out
x = paddle.randn((1, 1))
x.stop_gradient = False
attn_layers = []
for idx in range(0, 2):
attn_layers.append(Exp())
for step in range(0, 2):
a = x
for j in range(0,2):
a = attn_layers[j].apply(x)
a.backward()
"""
self.not_inplace_tensors = args
def mark_non_differentiable(self, *args): def mark_non_differentiable(self, *args):
""" """
Marks outputs as non-differentiable. Marks outputs as non-differentiable.
This should be called at most once, only from inside thethe `forward` method, This should be called at most once, only from inside the `forward` method,
and all arguments should be tensor outputs. and all arguments should be tensor outputs.
This will mark outputs as not requiring gradients, increasing the This will mark outputs as not requiring gradients, increasing the
...@@ -475,7 +512,7 @@ class EagerPyLayerContext(object): ...@@ -475,7 +512,7 @@ class EagerPyLayerContext(object):
class Tanh(PyLayer): class Tanh(PyLayer):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
return x, x+x return x+x+x, x+x
@staticmethod @staticmethod
def backward(ctx, grad, grad2): def backward(ctx, grad, grad2):
...@@ -486,7 +523,7 @@ class EagerPyLayerContext(object): ...@@ -486,7 +523,7 @@ class EagerPyLayerContext(object):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.set_materialize_grads(False) ctx.set_materialize_grads(False)
return x, x+x return x+x+x, x+x
@staticmethod @staticmethod
def backward(ctx, grad, grad2): def backward(ctx, grad, grad2):
......
...@@ -480,7 +480,7 @@ class TestPyLayer(unittest.TestCase): ...@@ -480,7 +480,7 @@ class TestPyLayer(unittest.TestCase):
super(Layer, self).__init__() super(Layer, self).__init__()
def forward(self, data): def forward(self, data):
data = paddle.nn.functional.relu(data) data = data**2
z = paddle.tanh(data) z = paddle.tanh(data)
z = cus_tanh.apply(data) z = cus_tanh.apply(data)
return z.mean() return z.mean()
...@@ -506,7 +506,6 @@ class TestPyLayer(unittest.TestCase): ...@@ -506,7 +506,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.mark_dirty(x)
return x return x
@staticmethod @staticmethod
...@@ -543,7 +542,6 @@ class TestPyLayer(unittest.TestCase): ...@@ -543,7 +542,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.mark_dirty(x)
return x return x
@staticmethod @staticmethod
...@@ -578,7 +576,6 @@ class TestPyLayer(unittest.TestCase): ...@@ -578,7 +576,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.mark_dirty(x)
return x return x
@staticmethod @staticmethod
...@@ -612,8 +609,6 @@ class TestPyLayer(unittest.TestCase): ...@@ -612,8 +609,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
if in_dygraph_mode():
ctx.mark_dirty(x)
return x return x
@staticmethod @staticmethod
...@@ -710,6 +705,7 @@ class TestPyLayer(unittest.TestCase): ...@@ -710,6 +705,7 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.mark_not_inplace(x)
return x, x + x return x, x + x
@staticmethod @staticmethod
...@@ -728,6 +724,7 @@ class TestPyLayer(unittest.TestCase): ...@@ -728,6 +724,7 @@ class TestPyLayer(unittest.TestCase):
@staticmethod @staticmethod
def forward(ctx, x): def forward(ctx, x):
ctx.mark_not_inplace(x)
ctx.set_materialize_grads(False) ctx.set_materialize_grads(False)
return x, x + x return x, x + x
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册