未验证 提交 bba13e21 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] pylayer detach output tensor if it is equal with input (#45065)

* pylayer detach output tensor if it is equal with input

* pylayer detach output tensor if it is equal with input
上级 197f4048
......@@ -29,7 +29,7 @@ typedef struct {
typedef struct {
PyObject_HEAD PyObject* container;
PyObject* non_differentiable;
PyObject* dirty_tensors;
PyObject* not_inplace_tensors;
bool materialize_grads;
std::vector<bool> forward_input_tensor_is_duplicable;
std::vector<bool> forward_output_tensor_is_duplicable;
......
......@@ -92,8 +92,8 @@ static void PyLayerDealloc(PyLayerObject* self) {
if (self->non_differentiable) {
Py_DECREF(self->non_differentiable);
}
if (self->dirty_tensors) {
Py_DECREF(self->dirty_tensors);
if (self->not_inplace_tensors) {
Py_DECREF(self->not_inplace_tensors);
}
self->grad_node.~weak_ptr<egr::GradNodePyLayer>();
self->forward_input_tensor_is_duplicable.~vector();
......@@ -108,6 +108,20 @@ PyObject* pylayer_method_name(PyObject* self, PyObject* noargs) {
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* new_tensor_with_impl(paddle::experimental::Tensor* tensor) {
PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0);
if (obj) {
auto v = reinterpret_cast<TensorObject*>(obj);
new (&(v->tensor)) paddle::experimental::Tensor();
v->tensor.set_impl(tensor->impl());
v->tensor.set_name(egr::Controller::Instance().GenerateUniqueName());
} else {
PADDLE_THROW(platform::errors::Fatal(
"tp_alloc return null, can not new a PyObject."));
}
return obj;
}
PyObject* pylayer_method_apply(PyObject* cls,
PyObject* args,
PyObject* kwargs) {
......@@ -151,6 +165,7 @@ PyObject* pylayer_method_apply(PyObject* cls,
inputs_tensor.reserve(inputs_size);
ctx->forward_input_tensor_is_duplicable.clear();
ctx->forward_input_tensor_is_duplicable.reserve(inputs_size);
std::set<phi::TensorBase*> input_tensorbases;
for (size_t i = 0; i < inputs_size; i++) {
PyObject* obj = nullptr;
if (i >= args_size) {
......@@ -159,6 +174,8 @@ PyObject* pylayer_method_apply(PyObject* cls,
obj = PyTuple_GET_ITEM(args, i);
}
if (IsEagerTensor(obj)) {
input_tensorbases.insert(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get());
auto autograd_meta = egr::EagerUtils::nullable_autograd_meta(
reinterpret_cast<TensorObject*>(obj)->tensor);
inputs_autograd_meta.push_back({autograd_meta});
......@@ -173,10 +190,12 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyList_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyList_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) {
if (IsEagerTensor(PyList_GetItem(obj, i))) {
tensors.push_back(&(
reinterpret_cast<TensorObject*>(PyList_GetItem(obj, i))->tensor));
for (Py_ssize_t j = 0; j < len; j++) {
PyObject* o = PyList_GetItem(obj, j);
if (IsEagerTensor(o)) {
input_tensorbases.insert(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get());
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
if (!tensors.empty()) {
......@@ -194,11 +213,12 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyTuple_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyTuple_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) {
if (IsEagerTensor(PyTuple_GetItem(obj, i))) {
tensors.push_back(
&(reinterpret_cast<TensorObject*>(PyTuple_GetItem(obj, i))
->tensor));
for (Py_ssize_t j = 0; j < len; j++) {
PyObject* o = PyTuple_GetItem(obj, j);
if (IsEagerTensor(o)) {
input_tensorbases.insert(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get());
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
if (!tensors.empty()) {
......@@ -252,6 +272,13 @@ PyObject* pylayer_method_apply(PyObject* cls,
PyTuple_SET_ITEM(outputs_tuple, 0, outputs);
}
std::set<paddle::experimental::Tensor*> inplace_tensors;
std::set<phi::TensorBase*> not_inplace_tensorbases;
auto not_inplace_tensors = GetTensorsFromPyObject(ctx->not_inplace_tensors);
for (auto it : not_inplace_tensors) {
not_inplace_tensorbases.insert(it->impl().get());
}
auto outputs_size = PyTuple_GET_SIZE(outputs_tuple);
std::vector<std::vector<paddle::experimental::Tensor*>> outputs_tensor;
outputs_tensor.reserve(outputs_size);
......@@ -267,13 +294,39 @@ PyObject* pylayer_method_apply(PyObject* cls,
outputs_autograd_meta.push_back({egr::EagerUtils::autograd_meta(
&(reinterpret_cast<TensorObject*>(obj)->tensor))});
ctx->forward_output_tensor_is_duplicable.push_back(false);
if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(obj)->tensor.impl().get())) {
PyTuple_SET_ITEM(outputs_tuple,
i,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(obj)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(obj)->tensor));
}
}
} else if (PyList_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyList_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) {
if (IsEagerTensor(PyList_GetItem(obj, i))) {
tensors.push_back(&(
reinterpret_cast<TensorObject*>(PyList_GetItem(obj, i))->tensor));
for (Py_ssize_t j = 0; j < len; j++) {
PyObject* o = PyList_GetItem(obj, j);
if (IsEagerTensor(o)) {
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
PyTuple_SetItem(obj,
j,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(o)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
}
}
if (!tensors.empty()) {
......@@ -285,11 +338,23 @@ PyObject* pylayer_method_apply(PyObject* cls,
} else if (PyTuple_Check(obj)) {
std::vector<paddle::experimental::Tensor*> tensors;
Py_ssize_t len = PyTuple_Size(obj);
for (Py_ssize_t i = 0; i < len; i++) {
if (IsEagerTensor(PyTuple_GetItem(obj, i))) {
tensors.push_back(
&(reinterpret_cast<TensorObject*>(PyTuple_GetItem(obj, i))
->tensor));
for (Py_ssize_t j = 0; j < len; j++) {
PyObject* o = PyTuple_GetItem(obj, j);
if (IsEagerTensor(o)) {
tensors.push_back(&(reinterpret_cast<TensorObject*>(o)->tensor));
if (input_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
if (not_inplace_tensorbases.count(
reinterpret_cast<TensorObject*>(o)->tensor.impl().get())) {
PyTuple_SetItem(obj,
j,
new_tensor_with_impl(&(
reinterpret_cast<TensorObject*>(o)->tensor)));
} else {
inplace_tensors.insert(
&(reinterpret_cast<TensorObject*>(o)->tensor));
}
}
}
}
if (!tensors.empty()) {
......@@ -320,21 +385,19 @@ PyObject* pylayer_method_apply(PyObject* cls,
}
}
// add inplace strategy, inplaced tensor is ctx->dirty_tensors
auto dirty_tensors = GetTensorsFromPyObject(ctx->dirty_tensors);
for (auto it = dirty_tensors.begin(); it != dirty_tensors.end(); ++it) {
auto dirty_tensor = *it;
auto dirty_tensor_autograd_meta =
egr::EagerUtils::autograd_meta(dirty_tensor);
PADDLE_ENFORCE_EQ(!dirty_tensor_autograd_meta->StopGradient() &&
egr::egr_utils_api::IsLeafTensor(*dirty_tensor),
for (auto it = inplace_tensors.begin(); it != inplace_tensors.end(); ++it) {
auto inplace_tensor = *it;
auto inplace_tensor_autograd_meta =
egr::EagerUtils::autograd_meta(inplace_tensor);
PADDLE_ENFORCE_EQ(!inplace_tensor_autograd_meta->StopGradient() &&
egr::egr_utils_api::IsLeafTensor(*inplace_tensor),
false,
paddle::platform::errors::InvalidArgument(
"Leaf Var (%s) that doesn't stop gradient "
"can't use inplace strategy.",
dirty_tensor->name()));
dirty_tensor->bump_inplace_version();
VLOG(3) << "Tensor(" << dirty_tensor->name()
inplace_tensor->name()));
inplace_tensor->bump_inplace_version();
VLOG(3) << "Tensor(" << inplace_tensor->name()
<< ") uses Inplace Strategy.";
}
......@@ -376,7 +439,10 @@ PyObject* pylayer_method_apply(PyObject* cls,
VLOG(6) << "PyLayer construct backward node finish...";
}
if (!PyTuple_Check(outputs)) {
if (outputs_size == 1) {
Py_XDECREF(outputs);
outputs = PyTuple_GetItem(outputs_tuple, 0);
Py_INCREF(outputs);
Py_XDECREF(outputs_tuple);
}
Py_XDECREF(forward_args);
......@@ -389,12 +455,6 @@ PyObject* pylayer_method_apply(PyObject* cls,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* pylayer_method_register_hook(PyObject* _self, PyObject* hook) {
EAGER_TRY
return nullptr;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyObject* tensor_properties_get_container(PyLayerObject* self, void* closure) {
EAGER_TRY
if (self->container == nullptr) {
......@@ -438,24 +498,24 @@ int tensor_properties_set_non_differentiable(PyLayerObject* self,
EAGER_CATCH_AND_THROW_RETURN_NEG
}
PyObject* tensor_properties_get_dirty_tensors(PyLayerObject* self,
PyObject* tensor_properties_get_not_inplace_tensors(PyLayerObject* self,
void* closure) {
EAGER_TRY
if (self->dirty_tensors == nullptr) {
if (self->not_inplace_tensors == nullptr) {
RETURN_PY_NONE;
}
Py_INCREF(self->dirty_tensors);
return self->dirty_tensors;
Py_INCREF(self->not_inplace_tensors);
return self->not_inplace_tensors;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
int tensor_properties_set_dirty_tensors(PyLayerObject* self,
int tensor_properties_set_not_inplace_tensors(PyLayerObject* self,
PyObject* value,
void* closure) {
EAGER_TRY
Py_XINCREF(value);
Py_XDECREF(self->dirty_tensors);
self->dirty_tensors = value;
Py_XDECREF(self->not_inplace_tensors);
self->not_inplace_tensors = value;
return 0;
EAGER_CATCH_AND_THROW_RETURN_NEG
}
......@@ -478,10 +538,6 @@ PyMethodDef pylayer_methods[] = {
(PyCFunction)(void (*)(void))pylayer_method_apply,
METH_CLASS | METH_VARARGS | METH_KEYWORDS,
NULL},
{"register_hook",
(PyCFunction)(void (*)(void))pylayer_method_register_hook,
METH_O,
NULL},
{NULL, NULL, 0, NULL}};
struct PyGetSetDef pylayer_properties[] {
......@@ -495,9 +551,9 @@ struct PyGetSetDef pylayer_properties[] {
(setter)tensor_properties_set_non_differentiable,
nullptr,
nullptr},
{"dirty_tensors",
(getter)tensor_properties_get_dirty_tensors,
(setter)tensor_properties_set_dirty_tensors,
{"not_inplace_tensors",
(getter)tensor_properties_get_not_inplace_tensors,
(setter)tensor_properties_set_not_inplace_tensors,
nullptr,
nullptr},
{"materialize_grads",
......
......@@ -407,13 +407,50 @@ class EagerPyLayerContext(object):
"""
return self.container
def mark_dirty(self, *args):
self.dirty_tensors = args
def mark_not_inplace(self, *args):
"""
Marks inputs as not inplace.
This should be called at most once, only from inside the `forward` method,
and all arguments should be Tensor inputs.
If the Tensor returned by `forward` method is the same as the Tensor input of forward,
and this Tensor is marked as not_inplace, then Paddle will help the user create a new Tensor as output.
Thereby preventing the auto grad information of the input Tensor from being overwritten.
Examples:
.. code-block:: python
import paddle
class Exp(paddle.autograd.PyLayer):
@staticmethod
def forward(ctx, x):
ctx.mark_not_inplace(x)
return x
@staticmethod
def backward(ctx, grad_output):
out = grad_output.exp()
return out
x = paddle.randn((1, 1))
x.stop_gradient = False
attn_layers = []
for idx in range(0, 2):
attn_layers.append(Exp())
for step in range(0, 2):
a = x
for j in range(0,2):
a = attn_layers[j].apply(x)
a.backward()
"""
self.not_inplace_tensors = args
def mark_non_differentiable(self, *args):
"""
Marks outputs as non-differentiable.
This should be called at most once, only from inside thethe `forward` method,
This should be called at most once, only from inside the `forward` method,
and all arguments should be tensor outputs.
This will mark outputs as not requiring gradients, increasing the
......@@ -475,7 +512,7 @@ class EagerPyLayerContext(object):
class Tanh(PyLayer):
@staticmethod
def forward(ctx, x):
return x, x+x
return x+x+x, x+x
@staticmethod
def backward(ctx, grad, grad2):
......@@ -486,7 +523,7 @@ class EagerPyLayerContext(object):
@staticmethod
def forward(ctx, x):
ctx.set_materialize_grads(False)
return x, x+x
return x+x+x, x+x
@staticmethod
def backward(ctx, grad, grad2):
......
......@@ -480,7 +480,7 @@ class TestPyLayer(unittest.TestCase):
super(Layer, self).__init__()
def forward(self, data):
data = paddle.nn.functional.relu(data)
data = data**2
z = paddle.tanh(data)
z = cus_tanh.apply(data)
return z.mean()
......@@ -506,7 +506,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
ctx.mark_dirty(x)
return x
@staticmethod
......@@ -543,7 +542,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
ctx.mark_dirty(x)
return x
@staticmethod
......@@ -578,7 +576,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
ctx.mark_dirty(x)
return x
@staticmethod
......@@ -612,8 +609,6 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
if in_dygraph_mode():
ctx.mark_dirty(x)
return x
@staticmethod
......@@ -710,6 +705,7 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
ctx.mark_not_inplace(x)
return x, x + x
@staticmethod
......@@ -728,6 +724,7 @@ class TestPyLayer(unittest.TestCase):
@staticmethod
def forward(ctx, x):
ctx.mark_not_inplace(x)
ctx.set_materialize_grads(False)
return x, x + x
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册