diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 69628d9b40021d092fcb65add7088e7df7fcd18e..ed1146eed0fb02df6e5c01381f2224d0b0d069b8 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -28,6 +28,9 @@ static void CopyOrAddTensor(egr::EagerTensor* tensor, const egr::EagerTensor& t) { + if (t.Var().IsInitialized()) { + const_cast(&t)->SyncToTensor(); + } if (!tensor->defined() || !tensor->initialized()) { // Simply copy tensor->impl *tensor = t; diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index a2683db75e92cc345affba3912135ccc75963e3f..9578924b783f5288b1d7a1432b656aeef494ee14 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -32,7 +32,7 @@ class GradNodeAccumulation : public GradNodeBase { void RetainGrad( const std::function& hook); - egr::EagerTensor Grad() { return accumulated_grad; } + egr::EagerTensor* Grad() { return &accumulated_grad; } private: egr::EagerTensor accumulated_grad; diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 0bcef2253f9938f479cc847ad07a2af74427d24c..72fe5732e9620af8e008aa96ca751a51f3ae4af7 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -239,8 +239,8 @@ class EagerTensor final { auto tensor_dense = std::dynamic_pointer_cast(tensor_->impl()); if (tensor_dense) { - paddle::experimental::MovesStorage(tensor_dense.get(), - framework_tensor); + paddle::experimental::SharesStorage(tensor_dense.get(), + framework_tensor); } else { PADDLE_THROW(paddle::platform::errors::Fatal( "Unrecognized egr::EagerTensor type, only " @@ -258,27 +258,23 @@ class EagerTensor final { /** Part 11: Sync paddle::framework::Variable with pten::Tensor **/ void SyncToTensor() { // Synchronize allocation only once. - if (!this->defined() || !this->initialized()) { - // TODO(jiabin): Support selected rows later. - if (var_.IsInitialized()) { - if (var_.IsType()) { - SetImplWithLegacyTensor(); - } else if (var_.IsType()) { - SetImplWithLegacyTensor(); - } else { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Unable to fetch underlying tensor " - "from VarBase, only LoDTensor and " - "Tensor are supported for now")); - } + if (var_.IsInitialized()) { + if (var_.IsType()) { + SetImplWithLegacyTensor(); + } else if (var_.IsType()) { + SetImplWithLegacyTensor(); } else { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Can not Sync EagerTensor %s whose paddle::framework::Variable is " - "not initialized!", - name())); + PADDLE_THROW( + paddle::platform::errors::Fatal("Unable to fetch underlying tensor " + "from VarBase, only LoDTensor and " + "Tensor are supported for now")); } + } else { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Can not Sync EagerTensor %s whose paddle::framework::Variable is " + "not initialized!", + name())); } } @@ -296,8 +292,16 @@ class EagerTensor final { template void SetImplWithLegacyTensor() { const auto& framework_tensor = var_.Get(); - this->set_impl( - std::move(paddle::experimental::MakePtenDenseTensor(framework_tensor))); + if (this->initialized()) { + VLOG(8) << "Sync Var to initialized tensor for: " << name(); + paddle::experimental::ReMakePtenDenseTensor( + framework_tensor, + static_cast(this->impl().get())); + } else { + VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); + this->set_impl(std::move( + paddle::experimental::MakePtenDenseTensor(framework_tensor))); + } var_.Clear(); } diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index a02f0bec456bf195d56ab3d47f7aa73c670b20a4..84daf4eac4ce694a89ab2508597a8cde1548f604 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -118,7 +118,7 @@ TEST(EagerTensor, MemberFunction) { CHECK_EQ(et3.Var().Get().data()[1], 10.0f); VLOG(6) << "SyncToTensor"; - CHECK(et3.initialized() == false); + CHECK(et3.initialized() == true); et3.SyncToTensor(); CHECK(et3.initialized() == true); VLOG(6) << "Check Tensor"; diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc index baa99dc93c2dd3396d1e6f30423efd87d6b5bc28..e05a63a69d002d80bcdfd17413944cac7a480acc 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc @@ -87,8 +87,8 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X, // Examine Forward Grad (w.r.t max_num_runs = 2) eager_test::CompareVariableWithValue(input_tensor0, 16); // Examine Backward Grad (w.r.t max_num_runs = 2) - eager_test::CompareGradVariableWithValue(X, 16); - eager_test::CompareGradVariableWithValue(Y, 16); + eager_test::CompareGradTensorWithValue(X, 16); + eager_test::CompareGradTensorWithValue(Y, 16); } } @@ -121,8 +121,8 @@ void benchmark_eager_intermediate_mlp(const EagerTensor& X, eager_test::CompareVariableWithValue(Out, result["Out"]); // Examine Backward Grad (w.r.t max_num_runs = 2) - eager_test::CompareGradVariableWithValue(X, result["GradX"]); - eager_test::CompareGradVariableWithValue(Ws[0], result["GradW"]); + eager_test::CompareGradTensorWithValue(X, result["GradX"]); + eager_test::CompareGradTensorWithValue(Ws[0], result["GradW"]); } } diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index a06091247bf7ae7d5469f4538d5bbc308e04f99e..b5ce9223f6c97ee254bdd0c7cfd1ba9229ba9b3a 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -54,7 +54,7 @@ TEST(Generated, Sigmoid) { RunBackward(target_tensors, {}); VLOG(6) << "Finish Backward"; - eager_test::CompareGradVariableWithValue(tensor, 0.25); + eager_test::CompareGradTensorWithValue(tensor, 0.25); } TEST(Generated, Matmul_v2) { @@ -85,8 +85,8 @@ TEST(Generated, Matmul_v2) { std::vector target_tensors = {output_tensor}; RunBackward(target_tensors, {}); - eager_test::CompareGradVariableWithValue(X, 2.0 * 20); - eager_test::CompareGradVariableWithValue(Y, 3.0 * 4); + eager_test::CompareGradTensorWithValue(X, 2.0 * 20); + eager_test::CompareGradTensorWithValue(Y, 3.0 * 4); } TEST(Generated, ElementwiseAdd) { @@ -116,8 +116,8 @@ TEST(Generated, ElementwiseAdd) { std::vector target_tensors = {output_tensor}; RunBackward(target_tensors, {}); - eager_test::CompareGradVariableWithValue(X, 1.0); - eager_test::CompareGradVariableWithValue(Y, 1.0); + eager_test::CompareGradTensorWithValue(X, 1.0); + eager_test::CompareGradTensorWithValue(Y, 1.0); } } // namespace egr diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 7f131f9ccd742516aa89d650ad440e9f15ae5df4..c56fe5be4da6982cc9e9c776093198ff8f9dd5c2 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -35,7 +35,7 @@ limitations under the License. */ namespace paddle { namespace pybind { -extern PyTypeObject* pEagerTensorType; +extern PyTypeObject* p_eager_tensor_type; static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, PyObject* args, PyObject* kwargs) { @@ -167,7 +167,7 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, EAGER_SYNC_TRY VLOG(4) << "ClearGradient " << self->eager_tensor.name(); - egr::EagerTensor grad; + egr::EagerTensor* grad; if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { // Add RetainGrad as PostHook to AccumulationNode std::shared_ptr grad_node = @@ -182,14 +182,14 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, grad = accumulation_grad_node->Grad(); } else { auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); - grad = meta->Grad(); + grad = meta->MutableGrad(); } - if (grad.initialized()) { + if (grad->initialized()) { VLOG(4) << "Gradient of " << self->eager_tensor.name() << " is initialized, will be released."; auto dense_tensor = - std::dynamic_pointer_cast(grad.impl()); + std::dynamic_pointer_cast(grad->impl()); dense_tensor->release(); } Py_INCREF(Py_None); @@ -202,7 +202,6 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, EAGER_TRY VLOG(4) << "ZeroGrads " << self->eager_tensor.name(); - egr::EagerTensor grad; if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { // Add RetainGrad as PostHook to AccumulationNode std::shared_ptr grad_node = @@ -214,21 +213,54 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, "with type: GradNodeAccumulation")); auto accumulation_grad_node = std::dynamic_pointer_cast(grad_node); - grad = accumulation_grad_node->Grad(); + if (accumulation_grad_node->Grad()->initialized()) { + accumulation_grad_node->Grad()->set_tensor( + std::make_shared( + paddle::experimental::zeros_like( + *(accumulation_grad_node->Grad()->Tensor().get())))); + } } else { auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); - grad = meta->Grad(); + if (meta->MutableGrad()->initialized()) { + meta->MutableGrad()->set_tensor( + std::make_shared( + paddle::experimental::zeros_like( + *(meta->MutableGrad()->Tensor().get())))); + } } - if (grad.initialized()) { - grad.set_tensor(std::make_shared( - paddle::experimental::zeros_like(*(grad.Tensor().get())))); - } Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor_method_detach(EagerTensorObject* self, + PyObject* args, PyObject* kwargs) { + EAGER_SYNC_TRY + PADDLE_ENFORCE_EQ( + self->eager_tensor.initialized(), true, + platform::errors::InvalidArgument("Tensor %s has not been initialized!", + self->eager_tensor.name())); + + PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0); + if (obj) { + auto v = reinterpret_cast(obj); + new (&(v->eager_tensor)) egr::EagerTensor(); + v->eager_tensor.set_impl(self->eager_tensor.impl()); + v->eager_tensor.set_name(egr::Controller::Instance().GenerateUniqueName()); + auto autograd_meta_src = + egr::EagerUtils::autograd_meta(&(self->eager_tensor)); + auto autograd_meta = egr::EagerUtils::autograd_meta(&(v->eager_tensor)); + autograd_meta->SetPersistable(autograd_meta_src->Persistable()); + } else { + PADDLE_THROW(platform::errors::Fatal( + "tp_alloc return null, can not new a PyObject.")); + } + + return obj; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyMethodDef variable_methods[] = { {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -246,6 +278,8 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, + {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, + METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index b147d5fbad0ed80cdb84ce37240819e4e5f8ee56..71b8bbbb1a283be084ae33c87f128cb7f1e05769 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -63,7 +63,6 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, void* closure) { EAGER_SYNC_TRY if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { - // Add RetainGrad as PostHook to AccumulationNode std::shared_ptr grad_node = egr::EagerUtils::grad_node(self->eager_tensor); PADDLE_ENFORCE( @@ -73,7 +72,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, "with type: GradNodeAccumulation")); auto accumulation_grad_node = std::dynamic_pointer_cast(grad_node); - return ToPyObject(accumulation_grad_node->Grad()); + return ToPyObject(*accumulation_grad_node->Grad()); } else { VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name(); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); @@ -82,6 +81,27 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +int eager_tensor_properties_set_grad(EagerTensorObject* self, PyObject* value, + void* closure) { + EAGER_SYNC_TRY + auto src = CastPyArg2EagerTensor(value, 0); + PADDLE_ENFORCE( + egr::egr_utils_api::IsLeafTensor(self->eager_tensor), + paddle::platform::errors::Fatal("Only leaf Tensor can be set grad.")); + std::shared_ptr grad_node = + egr::EagerUtils::grad_node(self->eager_tensor); + PADDLE_ENFORCE( + grad_node.get() != nullptr, + paddle::platform::errors::Fatal("Detected NULL grad_node" + "Leaf tensor should have had grad_node " + "with type: GradNodeAccumulation")); + auto accumulation_grad_node = + std::dynamic_pointer_cast(grad_node); + accumulation_grad_node->Grad()->copy_(src, true); + return 0; + EAGER_CATCH_AND_THROW_RETURN_ZERO +} + int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY @@ -147,8 +167,8 @@ PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self, } struct PyGetSetDef variable_properties[] = { - {"grad", (getter)eager_tensor_properties_get_grad, nullptr, nullptr, - nullptr}, + {"grad", (getter)eager_tensor_properties_get_grad, + (setter)eager_tensor_properties_set_grad, nullptr, nullptr}, {"name", (getter)eager_tensor_properties_get_name, (setter)eager_tensor_properties_set_name, nullptr, nullptr}, {"stop_gradient", (getter)eager_tensor_properties_get_stop_gradient, diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index c61f87ccf9089bc1c4977cd996b76b0b6fbec717..e06e7f52dd671bd59075ddb2ea89df588ce6daec 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -22,7 +22,7 @@ import paddle from .. import framework from .. import core from .. import unique_name -from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode +from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode, EagerParamBase from .base import switch_to_static_graph from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss @@ -149,7 +149,7 @@ def monkey_patch_varbase(): out = linear(t) # call with different weight """ - if _in_eager_mode(): + if core._in_eager_mode(): base_tensor = core.eager.EagerTensor else: base_tensor = core.VarBase @@ -238,7 +238,7 @@ def monkey_patch_varbase(): """ if framework.in_dygraph_mode(): if grad_tensor is not None: - if _in_eager_mode(): + if core._in_eager_mode(): assert isinstance( grad_tensor, core.eager.EagerTensor ), "The type of grad_tensor must be paddle.Tensor" @@ -250,7 +250,7 @@ def monkey_patch_varbase(): "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) - if _in_eager_mode(): + if core._in_eager_mode(): if grad_tensor is None: grad_tensor = [] else: @@ -258,7 +258,7 @@ def monkey_patch_varbase(): if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) - if _in_eager_mode(): + if core._in_eager_mode(): core.eager.run_backward([scaled_loss], grad_tensor, retain_graph) else: @@ -266,7 +266,7 @@ def monkey_patch_varbase(): retain_graph, framework._dygraph_tracer()) else: - if _in_eager_mode(): + if core._in_eager_mode(): core.eager.run_backward([self], grad_tensor, retain_graph) else: core.dygraph_run_backward([self], [grad_tensor], @@ -305,7 +305,7 @@ def monkey_patch_varbase(): # [500.] """ - if _in_eager_mode(): + if core._in_eager_mode(): if not self.grad._is_initialized(): return None # TODO(wanghuancoder) support SELECTED_ROWS @@ -587,7 +587,7 @@ def monkey_patch_varbase(): # [[0.30574632, 0.55739117, 0.30902600, 0.39413780, 0.44830436], # [0.79010487, 0.53972793, 0.09495186, 0.44267157, 0.72112119]]) """ - if _in_eager_mode(): + if core._in_eager_mode(): from paddle.tensor.to_string import eager_tensor_to_string return eager_tensor_to_string(self) else: @@ -619,7 +619,7 @@ def monkey_patch_varbase(): raise RuntimeError( "Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy" ) - if _in_eager_mode(): + if core._in_eager_mode(): new_varbase = core.eager.EagerTensor() else: new_varbase = core.VarBase() @@ -763,6 +763,14 @@ def monkey_patch_varbase(): else: return None + @framework.dygraph_only + def _set_grad_ivar(self, value): + if isinstance(self, EagerParamBase): + self.grad = value + else: + raise TypeError( + "_set_grad_ivar is only supported for Parameter Tensor") + @framework.dygraph_only def clear_gradient(self, set_to_zero=True): if set_to_zero: @@ -770,6 +778,10 @@ def monkey_patch_varbase(): else: self._clear_gradient() + @framework.dygraph_only + def clone(self): + return _C_ops_.assign(self) + if core._in_eager_mode() and not hasattr(core, "eager"): return @@ -790,7 +802,9 @@ def monkey_patch_varbase(): if core._in_eager_mode(): setattr(core.eager.EagerTensor, "_grad_ivar", _grad_ivar) + setattr(core.eager.EagerTensor, "_set_grad_ivar", _set_grad_ivar) setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient) + setattr(core.eager.EagerTensor, "clone", clone) else: setattr(core.VarBase, "__name__", "Tensor") setattr(core.VarBase, "grad", grad)