diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index f47bd3350e30f46acfc05b5fd1b6f58a7b87828c..7f131f9ccd742516aa89d650ad440e9f15ae5df4 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -27,6 +27,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/exception.h" +#include "paddle/pten/api/include/api.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" @@ -125,13 +126,17 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self, bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " << self->eager_tensor.name(); + if (!self->eager_tensor.defined()) { + egr::EagerUtils::autograd_meta(&(self->eager_tensor)) + ->SetStopGradient( + egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); + egr::EagerUtils::autograd_meta(&(self->eager_tensor)) + ->SetPersistable( + egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); + } + self->eager_tensor.copy_(src_tensor, blocking); - egr::EagerUtils::autograd_meta(&(self->eager_tensor)) - ->SetStopGradient( - egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); - egr::EagerUtils::autograd_meta(&(self->eager_tensor)) - ->SetPersistable( - egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); + VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " << self->eager_tensor.name(); Py_INCREF(Py_None); @@ -156,6 +161,74 @@ static PyObject* eager_tensor_retain_grads(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + VLOG(4) << "ClearGradient " << self->eager_tensor.name(); + + egr::EagerTensor grad; + if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { + // Add RetainGrad as PostHook to AccumulationNode + std::shared_ptr grad_node = + egr::EagerUtils::grad_node(self->eager_tensor); + PADDLE_ENFORCE( + grad_node.get() != nullptr, + paddle::platform::errors::Fatal("Detected NULL grad_node" + "Leaf tensor should have had grad_node " + "with type: GradNodeAccumulation")); + auto accumulation_grad_node = + std::dynamic_pointer_cast(grad_node); + grad = accumulation_grad_node->Grad(); + } else { + auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); + grad = meta->Grad(); + } + + if (grad.initialized()) { + VLOG(4) << "Gradient of " << self->eager_tensor.name() + << " is initialized, will be released."; + auto dense_tensor = + std::dynamic_pointer_cast(grad.impl()); + dense_tensor->release(); + } + Py_INCREF(Py_None); + return Py_None; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + +static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, + PyObject* args, PyObject* kwargs) { + EAGER_TRY + VLOG(4) << "ZeroGrads " << self->eager_tensor.name(); + + egr::EagerTensor grad; + if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { + // Add RetainGrad as PostHook to AccumulationNode + std::shared_ptr grad_node = + egr::EagerUtils::grad_node(self->eager_tensor); + PADDLE_ENFORCE( + grad_node.get() != nullptr, + paddle::platform::errors::Fatal("Detected NULL grad_node" + "Leaf tensor should have had grad_node " + "with type: GradNodeAccumulation")); + auto accumulation_grad_node = + std::dynamic_pointer_cast(grad_node); + grad = accumulation_grad_node->Grad(); + } else { + auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); + grad = meta->Grad(); + } + + if (grad.initialized()) { + grad.set_tensor(std::make_shared( + paddle::experimental::zeros_like(*(grad.Tensor().get())))); + } + Py_INCREF(Py_None); + return Py_None; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyMethodDef variable_methods[] = { {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -168,6 +241,11 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"retain_grads", (PyCFunction)(void (*)(void))eager_tensor_retain_grads, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_clear_gradient", + (PyCFunction)(void (*)(void))eager_tensor__clear_gradient, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, + METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 879ea2b5d264e2c3727293749583e21ba580bfc9..9849d0d41611bfbe0c3458dcc08476227e09820d 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -222,6 +222,8 @@ std::vector CastPyArg2VectorOfEagerTensor(PyObject* obj, reinterpret_cast(item->ob_type)->tp_name, i)); } } + } else if (obj == Py_None) { + return {}; } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -263,6 +265,8 @@ std::vector CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos) { reinterpret_cast(item->ob_type)->tp_name, i)); } } + } else if (obj == Py_None) { + return {}; } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -557,6 +561,8 @@ std::vector GetEagerTensorListFromArgs( reinterpret_cast(PyTuple_GetItem(list, i)) ->eager_tensor); } + } else if (list == Py_None) { + return {}; } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument '%s' (position %d) must be list of Tensors, but got " @@ -634,6 +640,8 @@ std::vector GetEagerTensorPtrListFromArgs( &(reinterpret_cast(PyTuple_GetItem(list, i)) ->eager_tensor)); } + } else if (list == Py_None) { + return {}; } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument '%s' (position %d) must be list of Tensors, but got " @@ -644,6 +652,5 @@ std::vector GetEagerTensorPtrListFromArgs( return result; } - } // namespace pybind } // namespace paddle diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index cd8f9f8545847d1c08588cbba1524e2f56331116..ec589b40e907ff510830e1e02dc222143d3d2104 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -55,7 +55,6 @@ from . import initializer from .initializer import set_global_initializer from . import layers from . import dygraph -from . import eager from . import contrib from . import nets from . import optimizer @@ -91,7 +90,6 @@ from .dygraph.base import enable_dygraph, disable_dygraph from .io import save, load, load_program_state, set_program_state from .dygraph.checkpoint import save_dygraph, load_dygraph from .dygraph.varbase_patch_methods import monkey_patch_varbase -from .eager.eager_tensor_patch_methods import monkey_patch_eagertensor from . import generator from .core import _cuda_synchronize from .generator import Generator @@ -115,7 +113,6 @@ __all__ = framework.__all__ + executor.__all__ + \ 'contrib', 'data', 'dygraph', - 'eager', 'enable_dygraph', 'disable_dygraph', 'enable_imperative', @@ -221,7 +218,6 @@ def __bootstrap__(): monkey_patch_variable() __bootstrap__() monkey_patch_varbase() -monkey_patch_eagertensor() # NOTE(zhiqiu): register npu_finalize on the exit of Python, # do some clean up manually. diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 92fbc89a46e327ad4306e4956f750bc2957a2cdd..64c418fabb11f6a82ca328aa74ac540480477fba 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -60,6 +60,7 @@ _complex_dtypes = [ ] _already_patch_varbase = False +_already_patch_eager_tensor = False def monkey_patch_math_varbase(): @@ -220,7 +221,11 @@ def monkey_patch_math_varbase(): # 2. create varbase for scalar lhs_dtype = self.dtype - if not isinstance(other_var, core.VarBase): + if _in_eager_mode(): + other_var_should_be = core.eager.EagerTensor + else: + other_var_should_be = core.VarBase + if not isinstance(other_var, other_var_should_be): if isinstance(other_var, complex): import paddle other_var = paddle.to_tensor(other_var, dtype='complex64') @@ -333,22 +338,30 @@ def monkey_patch_math_varbase(): ] global _already_patch_varbase + global _already_patch_eager_tensor + + if core._in_eager_mode(): + local_already_patch = _already_patch_eager_tensor + _already_patch_eager_tensor = True + local_tensor = core.eager.EagerTensor + else: + local_already_patch = _already_patch_varbase + _already_patch_varbase = True + local_tensor = core.VarBase - if not _already_patch_varbase: + if not local_already_patch: for method in varbase_methods: method_name = method[0] method_impl = method[1] - setattr(core.VarBase, method_name, method_impl) + setattr(local_tensor, method_name, method_impl) else: import paddle.tensor # Tensor method from module paddle.tensor for method_name in paddle.tensor.tensor_method_func: - if hasattr(core.VarBase, method_name): continue + if hasattr(local_tensor, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) - if method_impl: setattr(core.VarBase, method_name, method_impl) + if method_impl: setattr(local_tensor, method_name, method_impl) for magic_method, origin_method in paddle.tensor.magic_method_func: impl = getattr(paddle.tensor, origin_method, None) - if impl: setattr(core.VarBase, magic_method, impl) - - _already_patch_varbase = True + if impl: setattr(local_tensor, magic_method, impl) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index a2cecb8030db59067c3b5bd5b89a67e4f16e4b5a..c61f87ccf9089bc1c4977cd996b76b0b6fbec717 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -22,7 +22,7 @@ import paddle from .. import framework from .. import core from .. import unique_name -from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_ +from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode from .base import switch_to_static_graph from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss @@ -58,6 +58,9 @@ class TensorHookRemoveHelper(object): return False +_already_patch_repr = False + + def monkey_patch_varbase(): @switch_to_static_graph def _to_static_var(self, to_parameter=False, **kwargs): @@ -146,7 +149,11 @@ def monkey_patch_varbase(): out = linear(t) # call with different weight """ - assert isinstance(value, (np.ndarray, core.VarBase, dict, str)), \ + if _in_eager_mode(): + base_tensor = core.eager.EagerTensor + else: + base_tensor = core.VarBase + assert isinstance(value, (np.ndarray, base_tensor, dict, str)), \ "Variable set_value function, arguments type only support Variable, numpy, VarBase, dict, string." if isinstance(value, (dict, str)): @@ -160,7 +167,7 @@ def monkey_patch_varbase(): self.value().set_string_list(value) else: value_np = value - if isinstance(value, core.VarBase): + if isinstance(value, base_tensor): value_np = value.numpy() self_tensor_np = self.numpy() @@ -231,22 +238,40 @@ def monkey_patch_varbase(): """ if framework.in_dygraph_mode(): if grad_tensor is not None: - assert isinstance( - grad_tensor, paddle. - Tensor), "The type of grad_tensor must be paddle.Tensor" + if _in_eager_mode(): + assert isinstance( + grad_tensor, core.eager.EagerTensor + ), "The type of grad_tensor must be paddle.Tensor" + else: + assert isinstance( + grad_tensor, paddle. + Tensor), "The type of grad_tensor must be paddle.Tensor" assert grad_tensor.shape == self.shape, \ "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) + if _in_eager_mode(): + if grad_tensor is None: + grad_tensor = [] + else: + grad_tensor = [grad_tensor] if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) - core.dygraph_run_backward([scaled_loss], [grad_tensor], - retain_graph, - framework._dygraph_tracer()) + if _in_eager_mode(): + core.eager.run_backward([scaled_loss], grad_tensor, + retain_graph) + else: + core.dygraph_run_backward([scaled_loss], [grad_tensor], + retain_graph, + framework._dygraph_tracer()) else: - core.dygraph_run_backward([self], [grad_tensor], retain_graph, - framework._dygraph_tracer()) + if _in_eager_mode(): + core.eager.run_backward([self], grad_tensor, retain_graph) + else: + core.dygraph_run_backward([self], [grad_tensor], + retain_graph, + framework._dygraph_tracer()) else: raise ValueError( "Variable.backward() is only available in DyGraph mode") @@ -280,15 +305,22 @@ def monkey_patch_varbase(): # [500.] """ - if self._grad_ivar() is None: - return None + if _in_eager_mode(): + if not self.grad._is_initialized(): + return None + # TODO(wanghuancoder) support SELECTED_ROWS + return self.grad.numpy() + else: + if self._grad_ivar() is None: + return None - new_ivar = self._grad_ivar()._copy_to(core.CPUPlace(), True) - if self._grad_ivar().type == core.VarDesc.VarType.SELECTED_ROWS: - return (np.array(new_ivar.value().get_selected_rows().get_tensor()), + new_ivar = self._grad_ivar()._copy_to(core.CPUPlace(), True) + if self._grad_ivar().type == core.VarDesc.VarType.SELECTED_ROWS: + return ( + np.array(new_ivar.value().get_selected_rows().get_tensor()), np.array(new_ivar.value().get_selected_rows().rows())) - else: - return np.array(new_ivar.value().get_tensor()) + else: + return np.array(new_ivar.value().get_tensor()) @framework.dygraph_only def register_hook(self, hook): @@ -555,8 +587,12 @@ def monkey_patch_varbase(): # [[0.30574632, 0.55739117, 0.30902600, 0.39413780, 0.44830436], # [0.79010487, 0.53972793, 0.09495186, 0.44267157, 0.72112119]]) """ - from paddle.tensor.to_string import to_string - return to_string(self) + if _in_eager_mode(): + from paddle.tensor.to_string import eager_tensor_to_string + return eager_tensor_to_string(self) + else: + from paddle.tensor.to_string import to_string + return to_string(self) def __deepcopy__(self, memo): """ @@ -583,7 +619,10 @@ def monkey_patch_varbase(): raise RuntimeError( "Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy" ) - new_varbase = core.VarBase() + if _in_eager_mode(): + new_varbase = core.eager.EagerTensor() + else: + new_varbase = core.VarBase() new_varbase.name = self.name + unique_name.generate("_deepcopy") memo[id(self)] = new_varbase new_varbase.copy_(self, True) @@ -717,33 +756,62 @@ def monkey_patch_varbase(): # Call c++ func __setitem_varbase__ to speedup. return self.__setitem_varbase__(item, value) + @framework.dygraph_only + def _grad_ivar(self): + if self.grad._is_initialized(): + return self.grad + else: + return None + + @framework.dygraph_only + def clear_gradient(self, set_to_zero=True): + if set_to_zero: + self._zero_grads() + else: + self._clear_gradient() + + if core._in_eager_mode() and not hasattr(core, "eager"): + return + for method_name, method in ( ("__bool__", __bool__), ("__nonzero__", __nonzero__), ("_to_static_var", _to_static_var), ("set_value", set_value), ("block", block), ("backward", backward), ("clear_grad", clear_grad), - ("inplace_version", inplace_version), ("grad", grad), - ("gradient", gradient), ("register_hook", register_hook), - ("__str__", __str__), ("__repr__", __str__), - ("__deepcopy__", __deepcopy__), ("__module__", "paddle"), - ("__name__", "Tensor"), ("__array__", __array__), + ("inplace_version", inplace_version), ("gradient", gradient), + ("register_hook", register_hook), ("__str__", __str__), + ("__repr__", __str__), ("__deepcopy__", __deepcopy__), + ("__module__", "paddle"), ("__array__", __array__), ("__getitem__", __getitem__), ("item", item), ("__setitem__", __setitem__), ("_to", _to)): - setattr(core.VarBase, method_name, method) - - # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. - # So, we need to overwrite it to a more readable one. - # See details in https://github.com/pybind/pybind11/issues/2537. - origin = getattr(core.VarDesc.VarType, "__repr__") - - def dtype_str(dtype): - if dtype in _PADDLE_DTYPE_2_NUMPY_DTYPE: - prefix = 'paddle.' - return prefix + _PADDLE_DTYPE_2_NUMPY_DTYPE[dtype] + if core._in_eager_mode(): + setattr(core.eager.EagerTensor, method_name, method) else: - # for example, paddle.fluid.core.VarDesc.VarType.LOD_TENSOR - return origin(dtype) + setattr(core.VarBase, method_name, method) + + if core._in_eager_mode(): + setattr(core.eager.EagerTensor, "_grad_ivar", _grad_ivar) + setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient) + else: + setattr(core.VarBase, "__name__", "Tensor") + setattr(core.VarBase, "grad", grad) + + global _already_patch_repr + if not _already_patch_repr: + # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. + # So, we need to overwrite it to a more readable one. + # See details in https://github.com/pybind/pybind11/issues/2537. + origin = getattr(core.VarDesc.VarType, "__repr__") + + def dtype_str(dtype): + if dtype in _PADDLE_DTYPE_2_NUMPY_DTYPE: + prefix = 'paddle.' + return prefix + _PADDLE_DTYPE_2_NUMPY_DTYPE[dtype] + else: + # for example, paddle.fluid.core.VarDesc.VarType.LOD_TENSOR + return origin(dtype) - setattr(core.VarDesc.VarType, "__repr__", dtype_str) + setattr(core.VarDesc.VarType, "__repr__", dtype_str) + _already_patch_repr = True # patch math methods for varbase monkey_patch_math_varbase() diff --git a/python/paddle/fluid/eager/__init__.py b/python/paddle/fluid/eager/__init__.py deleted file mode 100644 index 1dc82ef69979c16135b4f4c9fbb56f0f5b680fb3..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/eager/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and - -# incubate directory is mainly for internal use -# after we have tested incubate APIs in industrial application for a period -# we will move stable functions into fluid - -from . import eager_tensor_patch_methods - -__all__ = [] diff --git a/python/paddle/fluid/eager/eager_tensor_patch_methods.py b/python/paddle/fluid/eager/eager_tensor_patch_methods.py deleted file mode 100644 index 2586685ec1adad9b16d99702a509ba4ece12bbaa..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/eager/eager_tensor_patch_methods.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .. import core as core -from .. import framework as framework -from ..dygraph.parallel import scale_loss -import numpy as np - - -def monkey_patch_eagertensor(): - def __str__(self): - from paddle.tensor.to_string import eager_tensor_to_string - return eager_tensor_to_string(self) - - @framework.dygraph_only - def backward(self, grad_tensor=None, retain_graph=False): - """ - Run backward of current Graph which starts from current Tensor. - - The new gradient will accumulat on previous gradient. - - You can clear gradient by ``Tensor.clear_grad()`` . - - Args: - grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, - the initial gradient values of the current Tensor would be Tensor filled with 1.0; - if `grad_tensor` is not None, it must have the same length as the current Tensor. - Teh default value is None. - - retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would - like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter - :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. - Defaults to False. - Returns: - NoneType: None - - Examples: - .. code-block:: python - - import paddle - x = paddle.to_tensor(5., stop_gradient=False) - for i in range(5): - y = paddle.pow(x, 4.0) - y.backward() - print("{}: {}".format(i, x.grad)) - # 0: [500.] - # 1: [1000.] - # 2: [1500.] - # 3: [2000.] - # 4: [2500.] - - x.clear_grad() - print("{}".format(x.grad)) - # 0. - - grad_tensor=paddle.to_tensor(2.) - for i in range(5): - y = paddle.pow(x, 4.0) - y.backward(grad_tensor) - print("{}: {}".format(i, x.grad)) - # 0: [1000.] - # 1: [2000.] - # 2: [3000.] - # 3: [4000.] - # 4: [5000.] - - """ - if framework.in_dygraph_mode(): - if grad_tensor is not None: - assert isinstance( - grad_tensor, core.eager.EagerTensor - ), "The type of grad_tensor must be paddle.Tensor" - assert grad_tensor.shape == self.shape, \ - "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( - grad_tensor.name, grad_tensor.shape, self.name, self.shape) - grad_tensor = [grad_tensor] - else: - grad_tensor = [] - - if core.is_compiled_with_xpu() or core.is_compiled_with_npu(): - # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. - scaled_loss = scale_loss(self) - core.eager.run_backward([scaled_loss], grad_tensor, - retain_graph) - else: - core.eager.run_backward([self], grad_tensor, retain_graph) - else: - raise ValueError( - "Variable.backward() is only available in DyGraph mode") - - @framework.dygraph_only - def gradient(self): - """ - .. warning:: - This API will be deprecated in the future, it is recommended to use - :code:`x.grad` which returns the tensor value of the gradient. - - Get the Gradient of Current Tensor. - - Returns: - ndarray: Numpy value of the gradient of current Tensor - - Examples: - .. code-block:: python - - import paddle - - x = paddle.to_tensor(5., stop_gradient=False) - y = paddle.pow(x, 4.0) - y.backward() - print("grad of x: {}".format(x.gradient())) - # [500.] - - """ - if self.grad._is_initialized(): - return self.grad.numpy() - else: - return None - # TODO(wanghuancoder) support SELECTED_ROWS - - if hasattr(core, "eager"): - setattr(core.eager.EagerTensor, "__str__", __str__) - setattr(core.eager.EagerTensor, "backward", backward) - setattr(core.eager.EagerTensor, "gradient", gradient) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 73407ef834e228f918a0d0bd488b7cc17e685077..3d8cd1142cf3a5fb1b91c91cc654806ca39acea6 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -77,6 +77,7 @@ _global_expected_place_ = None _current_device = None global_prog_seed = 0 _current_pipeline_stage = None +_already_patch_eager_tensor = False _global_flags_ = core.globals() core._disable_eager_mode() @@ -85,6 +86,11 @@ core._disable_eager_mode() def _test_eager_guard(tracer=None): core._enable_eager_mode() _C_ops.switch_to_eager_ops() + global _already_patch_eager_tensor + if not _already_patch_eager_tensor: + from .dygraph.varbase_patch_methods import monkey_patch_varbase + monkey_patch_varbase() + _already_patch_eager_tensor = True if tracer is None: core._set_eager_tracer(_dygraph_tracer_) else: diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 5d0b56ed537d84174d94817aca45381bc04f802e..fd1562d609a1d514ef6e2bf197e0ba3ed735aeff 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -604,7 +604,7 @@ class XavierInitializer(Initializer): if framework.in_dygraph_mode(): if self._uniform: limit = np.sqrt(6.0 / float(fan_in + fan_out)) - out_var = _C_ops.uniform_random('shape', var.shape, 'min', + out_var = _C_ops.uniform_random('shape', out_var.shape, 'min', -limit, 'max', limit, 'seed', self._seed, 'dtype', out_dtype) else: diff --git a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py index 3bf2be3d64beed570234bc6464a1530f9971bcd8..45cb7e785bc5ed794074988d7e64cec4d73df5a7 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_code_generate_api.py @@ -13,7 +13,6 @@ # limitations under the License. import paddle.fluid.core as core -import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods import paddle import numpy as np from paddle.fluid.framework import _test_eager_guard diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 6d81a27882ff0adcb249179d924c6666b591396d..e4576fe2ea8bda9dcbcda6b206053b61f22fb4c2 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -13,7 +13,6 @@ # limitations under the License. import paddle.fluid.core as core -import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods import paddle import numpy as np from paddle.fluid.framework import _test_eager_guard, EagerParamBase, _in_eager_mode @@ -621,7 +620,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(np.array_equal(tensor.numpy(), arr)) print("Test copy_") tensor.copy_(tensor1, True) - self.assertEqual(tensor.persistable, True) + self.assertEqual(tensor.persistable, False) self.assertEqual(tensor.shape, [4, 16]) self.assertEqual(tensor.dtype, core.VarDesc.VarType.FP32) self.assertTrue(np.array_equal(tensor.numpy(), arr1)) @@ -764,20 +763,21 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): self.assertTrue(np.array_equal(res3, res4)) def test_backward_with_single_tensor(self): - arr4 = np.random.rand(4, 16, 16, 32).astype('float32') - egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace()) - egr_tensor12.retain_grads() - arr = np.ones([4, 16, 16, 32]).astype('float32') - self.assertEqual(egr_tensor12.persistable, False) - self.assertTrue("generated_tensor" in egr_tensor12.name) - self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32]) - self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32) - self.assertEqual(egr_tensor12.stop_gradient, True) - self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) - self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4)) - self.assertTrue(np.array_equal(egr_tensor12.gradient(), None)) - egr_tensor12.backward() - self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr)) + with _test_eager_guard(): + arr4 = np.random.rand(4, 16, 16, 32).astype('float32') + egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace()) + egr_tensor12.retain_grads() + arr = np.ones([4, 16, 16, 32]).astype('float32') + self.assertEqual(egr_tensor12.persistable, False) + self.assertTrue("generated_tensor" in egr_tensor12.name) + self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32]) + self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32) + self.assertEqual(egr_tensor12.stop_gradient, True) + self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) + self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4)) + self.assertTrue(np.array_equal(egr_tensor12.gradient(), None)) + egr_tensor12.backward() + self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr)) class EagerGuardTestCase(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 5b9e9ab8373abd4943ccc4a23ea92dfe5ff79de7..262d07336de08f89d6a40276d86299b8381b1d09 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -267,7 +267,7 @@ class TestImperative(unittest.TestCase): tmp = l1.weight * 2 self.assertTrue(tmp.stop_gradient) x = paddle.to_tensor(data) - y = l0(x) + tmp + y = paddle.add(l0(x), tmp) o = l1(y) o.backward() @@ -285,7 +285,7 @@ class TestImperative(unittest.TestCase): tmp = l1.weight * 2 self.assertTrue(tmp.stop_gradient) x = paddle.to_tensor(data) - y = l0(x) + tmp + y = paddle.add(l0(x), tmp) o = l1(y) o.backward() @@ -306,7 +306,7 @@ class TestImperative(unittest.TestCase): self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp2.stop_gradient is False) x = paddle.to_tensor(data) - y = l0(x) + tmp2 + y = paddle.add(l0(x), tmp2) o = l1(y) o.backward() @@ -329,7 +329,7 @@ class TestImperative(unittest.TestCase): tmp = paddle.to_tensor(x) tmp.stop_gradient = False inputs.append(tmp) - ret = fluid.layers.sums(inputs) + ret = paddle.add_n(inputs) loss = fluid.layers.reduce_sum(ret) loss.backward() with fluid.dygraph.guard(): diff --git a/python/setup.py.in b/python/setup.py.in index f14111c7dabb98fda9a04a333feb5a3c33fb4bd5..6b38facb5fd5e6ee83319e35504bebcddf46b447 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -308,7 +308,6 @@ packages=['paddle', 'paddle.fluid.dygraph', 'paddle.fluid.dygraph.dygraph_to_static', 'paddle.fluid.dygraph.amp', - 'paddle.fluid.eager', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', 'paddle.fluid.distributed',