未验证 提交 9108e777 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] Support test imperative basic in eager test_empty_grad (#38376)

* Rearranged Eager AutoCodeGen directory structure

* Removed USE_OP in Eager AutoCodeGen

* Enabled generation for Operators without Grad/Inputs/Outputs

* Resolved operators without input

* Fixed merge conflicts

* Enabled Eager AutoCodeGen for 10+ more operators

* Refactored Eager AutoCodeGen with more organized helper objects

* Enabled Eager AutoCodeGen for operators with multiple OpBases

* Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument

* Handled Dispensable Inputs/Outputs in Eager AutoCodeGen

* Adjusted function generation/call between Python-C API & Dygraph API

* Synchronized auto-generated Python-C API with Dygraph Forward Functions

* support more eager tensor api

* fix merge compile error

* fix compile error and fit develop code

* support pure CPU

* fix some logic error in eager_mode

* support _varbase_creator in eager mode

* Added safe_initialized interface to EagerTensor for use in processing dispensable inputs

* for eager mode

* refine

* support multiple constructor for eager tensor

* add place related code

* polish code

* specific randint with dtype of int64

* Support pure cpu test

* eager logic

* refine test in pure cpu

* eager logic

* eager logic

* eager logic, test=develop

* skip core.eager when in inference, test=develop

* refine, test=develop

* refine, test=develop

* call RetainGrad after run forward kernel, test=develop

* refine, test=develop

* support dygraph util, meta, guard test

* eager test case

* support inference test

* refine test and fix initializer failed

* modify eagertensor patch method

* add eagertensor.clear_grandint, test=develop

* refine, test=develop

* refine, test=develop

* refine, test=develop

* call monkey_patch_varbase in _test_eager_guard, test=develop

* split clear_gradient to clear_gradient and zero_grads, test=develop

* refine, test=develop

* refine, test=develop

* refine, test=develop
Co-authored-by: Njim19930609 <jim19930609@gmail.com>
Co-authored-by: NJiabinYang <360788950@qq.com>
上级 e1cc2236
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager.h"
#include "paddle/fluid/pybind/eager_utils.h" #include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/exception.h"
#include "paddle/pten/api/include/api.h"
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
...@@ -125,13 +126,17 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self, ...@@ -125,13 +126,17 @@ static PyObject* eager_tensor_method_copy_(EagerTensorObject* self,
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
<< self->eager_tensor.name(); << self->eager_tensor.name();
self->eager_tensor.copy_(src_tensor, blocking); if (!self->eager_tensor.defined()) {
egr::EagerUtils::autograd_meta(&(self->eager_tensor)) egr::EagerUtils::autograd_meta(&(self->eager_tensor))
->SetStopGradient( ->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
egr::EagerUtils::autograd_meta(&(self->eager_tensor)) egr::EagerUtils::autograd_meta(&(self->eager_tensor))
->SetPersistable( ->SetPersistable(
egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
}
self->eager_tensor.copy_(src_tensor, blocking);
VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
<< self->eager_tensor.name(); << self->eager_tensor.name();
Py_INCREF(Py_None); Py_INCREF(Py_None);
...@@ -156,6 +161,74 @@ static PyObject* eager_tensor_retain_grads(EagerTensorObject* self, ...@@ -156,6 +161,74 @@ static PyObject* eager_tensor_retain_grads(EagerTensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_SYNC_TRY
VLOG(4) << "ClearGradient " << self->eager_tensor.name();
egr::EagerTensor grad;
if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->eager_tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
grad = accumulation_grad_node->Grad();
} else {
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
grad = meta->Grad();
}
if (grad.initialized()) {
VLOG(4) << "Gradient of " << self->eager_tensor.name()
<< " is initialized, will be released.";
auto dense_tensor =
std::dynamic_pointer_cast<pten::DenseTensor>(grad.impl());
dense_tensor->release();
}
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_tensor__zero_grads(EagerTensorObject* self,
PyObject* args, PyObject* kwargs) {
EAGER_TRY
VLOG(4) << "ZeroGrads " << self->eager_tensor.name();
egr::EagerTensor grad;
if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) {
// Add RetainGrad as PostHook to AccumulationNode
std::shared_ptr<egr::GradNodeBase> grad_node =
egr::EagerUtils::grad_node(self->eager_tensor);
PADDLE_ENFORCE(
grad_node.get() != nullptr,
paddle::platform::errors::Fatal("Detected NULL grad_node"
"Leaf tensor should have had grad_node "
"with type: GradNodeAccumulation"));
auto accumulation_grad_node =
std::dynamic_pointer_cast<egr::GradNodeAccumulation>(grad_node);
grad = accumulation_grad_node->Grad();
} else {
auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor);
grad = meta->Grad();
}
if (grad.initialized()) {
grad.set_tensor(std::make_shared<paddle::experimental::Tensor>(
paddle::experimental::zeros_like(*(grad.Tensor().get()))));
}
Py_INCREF(Py_None);
return Py_None;
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_methods[] = { PyMethodDef variable_methods[] = {
{"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy, {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
...@@ -168,6 +241,11 @@ PyMethodDef variable_methods[] = { ...@@ -168,6 +241,11 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"retain_grads", (PyCFunction)(void (*)(void))eager_tensor_retain_grads, {"retain_grads", (PyCFunction)(void (*)(void))eager_tensor_retain_grads,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
{"_clear_gradient",
(PyCFunction)(void (*)(void))eager_tensor__clear_gradient,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}}; {NULL, NULL, 0, NULL}};
} // namespace pybind } // namespace pybind
......
...@@ -222,6 +222,8 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj, ...@@ -222,6 +222,8 @@ std::vector<egr::EagerTensor> CastPyArg2VectorOfEagerTensor(PyObject* obj,
reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i)); reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
} }
} }
} else if (obj == Py_None) {
return {};
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be " "argument (position %d) must be "
...@@ -263,6 +265,8 @@ std::vector<int> CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos) { ...@@ -263,6 +265,8 @@ std::vector<int> CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos) {
reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i)); reinterpret_cast<PyTypeObject*>(item->ob_type)->tp_name, i));
} }
} }
} else if (obj == Py_None) {
return {};
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be " "argument (position %d) must be "
...@@ -557,6 +561,8 @@ std::vector<egr::EagerTensor> GetEagerTensorListFromArgs( ...@@ -557,6 +561,8 @@ std::vector<egr::EagerTensor> GetEagerTensorListFromArgs(
reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i)) reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i))
->eager_tensor); ->eager_tensor);
} }
} else if (list == Py_None) {
return {};
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensors, but got " "%s(): argument '%s' (position %d) must be list of Tensors, but got "
...@@ -634,6 +640,8 @@ std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs( ...@@ -634,6 +640,8 @@ std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs(
&(reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i)) &(reinterpret_cast<EagerTensorObject*>(PyTuple_GetItem(list, i))
->eager_tensor)); ->eager_tensor));
} }
} else if (list == Py_None) {
return {};
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument '%s' (position %d) must be list of Tensors, but got " "%s(): argument '%s' (position %d) must be list of Tensors, but got "
...@@ -644,6 +652,5 @@ std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs( ...@@ -644,6 +652,5 @@ std::vector<egr::EagerTensor*> GetEagerTensorPtrListFromArgs(
return result; return result;
} }
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -55,7 +55,6 @@ from . import initializer ...@@ -55,7 +55,6 @@ from . import initializer
from .initializer import set_global_initializer from .initializer import set_global_initializer
from . import layers from . import layers
from . import dygraph from . import dygraph
from . import eager
from . import contrib from . import contrib
from . import nets from . import nets
from . import optimizer from . import optimizer
...@@ -91,7 +90,6 @@ from .dygraph.base import enable_dygraph, disable_dygraph ...@@ -91,7 +90,6 @@ from .dygraph.base import enable_dygraph, disable_dygraph
from .io import save, load, load_program_state, set_program_state from .io import save, load, load_program_state, set_program_state
from .dygraph.checkpoint import save_dygraph, load_dygraph from .dygraph.checkpoint import save_dygraph, load_dygraph
from .dygraph.varbase_patch_methods import monkey_patch_varbase from .dygraph.varbase_patch_methods import monkey_patch_varbase
from .eager.eager_tensor_patch_methods import monkey_patch_eagertensor
from . import generator from . import generator
from .core import _cuda_synchronize from .core import _cuda_synchronize
from .generator import Generator from .generator import Generator
...@@ -115,7 +113,6 @@ __all__ = framework.__all__ + executor.__all__ + \ ...@@ -115,7 +113,6 @@ __all__ = framework.__all__ + executor.__all__ + \
'contrib', 'contrib',
'data', 'data',
'dygraph', 'dygraph',
'eager',
'enable_dygraph', 'enable_dygraph',
'disable_dygraph', 'disable_dygraph',
'enable_imperative', 'enable_imperative',
...@@ -221,7 +218,6 @@ def __bootstrap__(): ...@@ -221,7 +218,6 @@ def __bootstrap__():
monkey_patch_variable() monkey_patch_variable()
__bootstrap__() __bootstrap__()
monkey_patch_varbase() monkey_patch_varbase()
monkey_patch_eagertensor()
# NOTE(zhiqiu): register npu_finalize on the exit of Python, # NOTE(zhiqiu): register npu_finalize on the exit of Python,
# do some clean up manually. # do some clean up manually.
......
...@@ -60,6 +60,7 @@ _complex_dtypes = [ ...@@ -60,6 +60,7 @@ _complex_dtypes = [
] ]
_already_patch_varbase = False _already_patch_varbase = False
_already_patch_eager_tensor = False
def monkey_patch_math_varbase(): def monkey_patch_math_varbase():
...@@ -220,7 +221,11 @@ def monkey_patch_math_varbase(): ...@@ -220,7 +221,11 @@ def monkey_patch_math_varbase():
# 2. create varbase for scalar # 2. create varbase for scalar
lhs_dtype = self.dtype lhs_dtype = self.dtype
if not isinstance(other_var, core.VarBase): if _in_eager_mode():
other_var_should_be = core.eager.EagerTensor
else:
other_var_should_be = core.VarBase
if not isinstance(other_var, other_var_should_be):
if isinstance(other_var, complex): if isinstance(other_var, complex):
import paddle import paddle
other_var = paddle.to_tensor(other_var, dtype='complex64') other_var = paddle.to_tensor(other_var, dtype='complex64')
...@@ -333,22 +338,30 @@ def monkey_patch_math_varbase(): ...@@ -333,22 +338,30 @@ def monkey_patch_math_varbase():
] ]
global _already_patch_varbase global _already_patch_varbase
global _already_patch_eager_tensor
if core._in_eager_mode():
local_already_patch = _already_patch_eager_tensor
_already_patch_eager_tensor = True
local_tensor = core.eager.EagerTensor
else:
local_already_patch = _already_patch_varbase
_already_patch_varbase = True
local_tensor = core.VarBase
if not _already_patch_varbase: if not local_already_patch:
for method in varbase_methods: for method in varbase_methods:
method_name = method[0] method_name = method[0]
method_impl = method[1] method_impl = method[1]
setattr(core.VarBase, method_name, method_impl) setattr(local_tensor, method_name, method_impl)
else: else:
import paddle.tensor import paddle.tensor
# Tensor method from module paddle.tensor # Tensor method from module paddle.tensor
for method_name in paddle.tensor.tensor_method_func: for method_name in paddle.tensor.tensor_method_func:
if hasattr(core.VarBase, method_name): continue if hasattr(local_tensor, method_name): continue
method_impl = getattr(paddle.tensor, method_name, None) method_impl = getattr(paddle.tensor, method_name, None)
if method_impl: setattr(core.VarBase, method_name, method_impl) if method_impl: setattr(local_tensor, method_name, method_impl)
for magic_method, origin_method in paddle.tensor.magic_method_func: for magic_method, origin_method in paddle.tensor.magic_method_func:
impl = getattr(paddle.tensor, origin_method, None) impl = getattr(paddle.tensor, origin_method, None)
if impl: setattr(core.VarBase, magic_method, impl) if impl: setattr(local_tensor, magic_method, impl)
_already_patch_varbase = True
...@@ -22,7 +22,7 @@ import paddle ...@@ -22,7 +22,7 @@ import paddle
from .. import framework from .. import framework
from .. import core from .. import core
from .. import unique_name from .. import unique_name
from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_ from ..framework import Variable, Parameter, ParamBase, _getitem_impl_, _setitem_impl_, _in_eager_mode
from .base import switch_to_static_graph from .base import switch_to_static_graph
from .math_op_patch import monkey_patch_math_varbase from .math_op_patch import monkey_patch_math_varbase
from .parallel import scale_loss from .parallel import scale_loss
...@@ -58,6 +58,9 @@ class TensorHookRemoveHelper(object): ...@@ -58,6 +58,9 @@ class TensorHookRemoveHelper(object):
return False return False
_already_patch_repr = False
def monkey_patch_varbase(): def monkey_patch_varbase():
@switch_to_static_graph @switch_to_static_graph
def _to_static_var(self, to_parameter=False, **kwargs): def _to_static_var(self, to_parameter=False, **kwargs):
...@@ -146,7 +149,11 @@ def monkey_patch_varbase(): ...@@ -146,7 +149,11 @@ def monkey_patch_varbase():
out = linear(t) # call with different weight out = linear(t) # call with different weight
""" """
assert isinstance(value, (np.ndarray, core.VarBase, dict, str)), \ if _in_eager_mode():
base_tensor = core.eager.EagerTensor
else:
base_tensor = core.VarBase
assert isinstance(value, (np.ndarray, base_tensor, dict, str)), \
"Variable set_value function, arguments type only support Variable, numpy, VarBase, dict, string." "Variable set_value function, arguments type only support Variable, numpy, VarBase, dict, string."
if isinstance(value, (dict, str)): if isinstance(value, (dict, str)):
...@@ -160,7 +167,7 @@ def monkey_patch_varbase(): ...@@ -160,7 +167,7 @@ def monkey_patch_varbase():
self.value().set_string_list(value) self.value().set_string_list(value)
else: else:
value_np = value value_np = value
if isinstance(value, core.VarBase): if isinstance(value, base_tensor):
value_np = value.numpy() value_np = value.numpy()
self_tensor_np = self.numpy() self_tensor_np = self.numpy()
...@@ -231,6 +238,11 @@ def monkey_patch_varbase(): ...@@ -231,6 +238,11 @@ def monkey_patch_varbase():
""" """
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
if grad_tensor is not None: if grad_tensor is not None:
if _in_eager_mode():
assert isinstance(
grad_tensor, core.eager.EagerTensor
), "The type of grad_tensor must be paddle.Tensor"
else:
assert isinstance( assert isinstance(
grad_tensor, paddle. grad_tensor, paddle.
Tensor), "The type of grad_tensor must be paddle.Tensor" Tensor), "The type of grad_tensor must be paddle.Tensor"
...@@ -238,14 +250,27 @@ def monkey_patch_varbase(): ...@@ -238,14 +250,27 @@ def monkey_patch_varbase():
"Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
grad_tensor.name, grad_tensor.shape, self.name, self.shape) grad_tensor.name, grad_tensor.shape, self.name, self.shape)
if _in_eager_mode():
if grad_tensor is None:
grad_tensor = []
else:
grad_tensor = [grad_tensor]
if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future. # TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss = scale_loss(self) scaled_loss = scale_loss(self)
if _in_eager_mode():
core.eager.run_backward([scaled_loss], grad_tensor,
retain_graph)
else:
core.dygraph_run_backward([scaled_loss], [grad_tensor], core.dygraph_run_backward([scaled_loss], [grad_tensor],
retain_graph, retain_graph,
framework._dygraph_tracer()) framework._dygraph_tracer())
else: else:
core.dygraph_run_backward([self], [grad_tensor], retain_graph, if _in_eager_mode():
core.eager.run_backward([self], grad_tensor, retain_graph)
else:
core.dygraph_run_backward([self], [grad_tensor],
retain_graph,
framework._dygraph_tracer()) framework._dygraph_tracer())
else: else:
raise ValueError( raise ValueError(
...@@ -280,12 +305,19 @@ def monkey_patch_varbase(): ...@@ -280,12 +305,19 @@ def monkey_patch_varbase():
# [500.] # [500.]
""" """
if _in_eager_mode():
if not self.grad._is_initialized():
return None
# TODO(wanghuancoder) support SELECTED_ROWS
return self.grad.numpy()
else:
if self._grad_ivar() is None: if self._grad_ivar() is None:
return None return None
new_ivar = self._grad_ivar()._copy_to(core.CPUPlace(), True) new_ivar = self._grad_ivar()._copy_to(core.CPUPlace(), True)
if self._grad_ivar().type == core.VarDesc.VarType.SELECTED_ROWS: if self._grad_ivar().type == core.VarDesc.VarType.SELECTED_ROWS:
return (np.array(new_ivar.value().get_selected_rows().get_tensor()), return (
np.array(new_ivar.value().get_selected_rows().get_tensor()),
np.array(new_ivar.value().get_selected_rows().rows())) np.array(new_ivar.value().get_selected_rows().rows()))
else: else:
return np.array(new_ivar.value().get_tensor()) return np.array(new_ivar.value().get_tensor())
...@@ -555,6 +587,10 @@ def monkey_patch_varbase(): ...@@ -555,6 +587,10 @@ def monkey_patch_varbase():
# [[0.30574632, 0.55739117, 0.30902600, 0.39413780, 0.44830436], # [[0.30574632, 0.55739117, 0.30902600, 0.39413780, 0.44830436],
# [0.79010487, 0.53972793, 0.09495186, 0.44267157, 0.72112119]]) # [0.79010487, 0.53972793, 0.09495186, 0.44267157, 0.72112119]])
""" """
if _in_eager_mode():
from paddle.tensor.to_string import eager_tensor_to_string
return eager_tensor_to_string(self)
else:
from paddle.tensor.to_string import to_string from paddle.tensor.to_string import to_string
return to_string(self) return to_string(self)
...@@ -583,6 +619,9 @@ def monkey_patch_varbase(): ...@@ -583,6 +619,9 @@ def monkey_patch_varbase():
raise RuntimeError( raise RuntimeError(
"Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy" "Only Leaf Tensor support the deepcopy at the moment, non-Leaf Tensors contains graph information that does't support deepcopy"
) )
if _in_eager_mode():
new_varbase = core.eager.EagerTensor()
else:
new_varbase = core.VarBase() new_varbase = core.VarBase()
new_varbase.name = self.name + unique_name.generate("_deepcopy") new_varbase.name = self.name + unique_name.generate("_deepcopy")
memo[id(self)] = new_varbase memo[id(self)] = new_varbase
...@@ -717,19 +756,47 @@ def monkey_patch_varbase(): ...@@ -717,19 +756,47 @@ def monkey_patch_varbase():
# Call c++ func __setitem_varbase__ to speedup. # Call c++ func __setitem_varbase__ to speedup.
return self.__setitem_varbase__(item, value) return self.__setitem_varbase__(item, value)
@framework.dygraph_only
def _grad_ivar(self):
if self.grad._is_initialized():
return self.grad
else:
return None
@framework.dygraph_only
def clear_gradient(self, set_to_zero=True):
if set_to_zero:
self._zero_grads()
else:
self._clear_gradient()
if core._in_eager_mode() and not hasattr(core, "eager"):
return
for method_name, method in ( for method_name, method in (
("__bool__", __bool__), ("__nonzero__", __nonzero__), ("__bool__", __bool__), ("__nonzero__", __nonzero__),
("_to_static_var", _to_static_var), ("set_value", set_value), ("_to_static_var", _to_static_var), ("set_value", set_value),
("block", block), ("backward", backward), ("clear_grad", clear_grad), ("block", block), ("backward", backward), ("clear_grad", clear_grad),
("inplace_version", inplace_version), ("grad", grad), ("inplace_version", inplace_version), ("gradient", gradient),
("gradient", gradient), ("register_hook", register_hook), ("register_hook", register_hook), ("__str__", __str__),
("__str__", __str__), ("__repr__", __str__), ("__repr__", __str__), ("__deepcopy__", __deepcopy__),
("__deepcopy__", __deepcopy__), ("__module__", "paddle"), ("__module__", "paddle"), ("__array__", __array__),
("__name__", "Tensor"), ("__array__", __array__),
("__getitem__", __getitem__), ("item", item), ("__getitem__", __getitem__), ("item", item),
("__setitem__", __setitem__), ("_to", _to)): ("__setitem__", __setitem__), ("_to", _to)):
if core._in_eager_mode():
setattr(core.eager.EagerTensor, method_name, method)
else:
setattr(core.VarBase, method_name, method) setattr(core.VarBase, method_name, method)
if core._in_eager_mode():
setattr(core.eager.EagerTensor, "_grad_ivar", _grad_ivar)
setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient)
else:
setattr(core.VarBase, "__name__", "Tensor")
setattr(core.VarBase, "grad", grad)
global _already_patch_repr
if not _already_patch_repr:
# NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class.
# So, we need to overwrite it to a more readable one. # So, we need to overwrite it to a more readable one.
# See details in https://github.com/pybind/pybind11/issues/2537. # See details in https://github.com/pybind/pybind11/issues/2537.
...@@ -744,6 +811,7 @@ def monkey_patch_varbase(): ...@@ -744,6 +811,7 @@ def monkey_patch_varbase():
return origin(dtype) return origin(dtype)
setattr(core.VarDesc.VarType, "__repr__", dtype_str) setattr(core.VarDesc.VarType, "__repr__", dtype_str)
_already_patch_repr = True
# patch math methods for varbase # patch math methods for varbase
monkey_patch_math_varbase() monkey_patch_math_varbase()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# incubate directory is mainly for internal use
# after we have tested incubate APIs in industrial application for a period
# we will move stable functions into fluid
from . import eager_tensor_patch_methods
__all__ = []
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .. import core as core
from .. import framework as framework
from ..dygraph.parallel import scale_loss
import numpy as np
def monkey_patch_eagertensor():
def __str__(self):
from paddle.tensor.to_string import eager_tensor_to_string
return eager_tensor_to_string(self)
@framework.dygraph_only
def backward(self, grad_tensor=None, retain_graph=False):
"""
Run backward of current Graph which starts from current Tensor.
The new gradient will accumulat on previous gradient.
You can clear gradient by ``Tensor.clear_grad()`` .
Args:
grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None,
the initial gradient values of the current Tensor would be Tensor filled with 1.0;
if `grad_tensor` is not None, it must have the same length as the current Tensor.
Teh default value is None.
retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would
like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter
:code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient.
Defaults to False.
Returns:
NoneType: None
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward()
print("{}: {}".format(i, x.grad))
# 0: [500.]
# 1: [1000.]
# 2: [1500.]
# 3: [2000.]
# 4: [2500.]
x.clear_grad()
print("{}".format(x.grad))
# 0.
grad_tensor=paddle.to_tensor(2.)
for i in range(5):
y = paddle.pow(x, 4.0)
y.backward(grad_tensor)
print("{}: {}".format(i, x.grad))
# 0: [1000.]
# 1: [2000.]
# 2: [3000.]
# 3: [4000.]
# 4: [5000.]
"""
if framework.in_dygraph_mode():
if grad_tensor is not None:
assert isinstance(
grad_tensor, core.eager.EagerTensor
), "The type of grad_tensor must be paddle.Tensor"
assert grad_tensor.shape == self.shape, \
"Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format(
grad_tensor.name, grad_tensor.shape, self.name, self.shape)
grad_tensor = [grad_tensor]
else:
grad_tensor = []
if core.is_compiled_with_xpu() or core.is_compiled_with_npu():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss = scale_loss(self)
core.eager.run_backward([scaled_loss], grad_tensor,
retain_graph)
else:
core.eager.run_backward([self], grad_tensor, retain_graph)
else:
raise ValueError(
"Variable.backward() is only available in DyGraph mode")
@framework.dygraph_only
def gradient(self):
"""
.. warning::
This API will be deprecated in the future, it is recommended to use
:code:`x.grad` which returns the tensor value of the gradient.
Get the Gradient of Current Tensor.
Returns:
ndarray: Numpy value of the gradient of current Tensor
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False)
y = paddle.pow(x, 4.0)
y.backward()
print("grad of x: {}".format(x.gradient()))
# [500.]
"""
if self.grad._is_initialized():
return self.grad.numpy()
else:
return None
# TODO(wanghuancoder) support SELECTED_ROWS
if hasattr(core, "eager"):
setattr(core.eager.EagerTensor, "__str__", __str__)
setattr(core.eager.EagerTensor, "backward", backward)
setattr(core.eager.EagerTensor, "gradient", gradient)
...@@ -77,6 +77,7 @@ _global_expected_place_ = None ...@@ -77,6 +77,7 @@ _global_expected_place_ = None
_current_device = None _current_device = None
global_prog_seed = 0 global_prog_seed = 0
_current_pipeline_stage = None _current_pipeline_stage = None
_already_patch_eager_tensor = False
_global_flags_ = core.globals() _global_flags_ = core.globals()
core._disable_eager_mode() core._disable_eager_mode()
...@@ -85,6 +86,11 @@ core._disable_eager_mode() ...@@ -85,6 +86,11 @@ core._disable_eager_mode()
def _test_eager_guard(tracer=None): def _test_eager_guard(tracer=None):
core._enable_eager_mode() core._enable_eager_mode()
_C_ops.switch_to_eager_ops() _C_ops.switch_to_eager_ops()
global _already_patch_eager_tensor
if not _already_patch_eager_tensor:
from .dygraph.varbase_patch_methods import monkey_patch_varbase
monkey_patch_varbase()
_already_patch_eager_tensor = True
if tracer is None: if tracer is None:
core._set_eager_tracer(_dygraph_tracer_) core._set_eager_tracer(_dygraph_tracer_)
else: else:
......
...@@ -604,7 +604,7 @@ class XavierInitializer(Initializer): ...@@ -604,7 +604,7 @@ class XavierInitializer(Initializer):
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
if self._uniform: if self._uniform:
limit = np.sqrt(6.0 / float(fan_in + fan_out)) limit = np.sqrt(6.0 / float(fan_in + fan_out))
out_var = _C_ops.uniform_random('shape', var.shape, 'min', out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
-limit, 'max', limit, 'seed', -limit, 'max', limit, 'seed',
self._seed, 'dtype', out_dtype) self._seed, 'dtype', out_dtype)
else: else:
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods
import paddle import paddle
import numpy as np import numpy as np
from paddle.fluid.framework import _test_eager_guard from paddle.fluid.framework import _test_eager_guard
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.eager.eager_tensor_patch_methods as eager_tensor_patch_methods
import paddle import paddle
import numpy as np import numpy as np
from paddle.fluid.framework import _test_eager_guard, EagerParamBase, _in_eager_mode from paddle.fluid.framework import _test_eager_guard, EagerParamBase, _in_eager_mode
...@@ -621,7 +620,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): ...@@ -621,7 +620,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(tensor.numpy(), arr)) self.assertTrue(np.array_equal(tensor.numpy(), arr))
print("Test copy_") print("Test copy_")
tensor.copy_(tensor1, True) tensor.copy_(tensor1, True)
self.assertEqual(tensor.persistable, True) self.assertEqual(tensor.persistable, False)
self.assertEqual(tensor.shape, [4, 16]) self.assertEqual(tensor.shape, [4, 16])
self.assertEqual(tensor.dtype, core.VarDesc.VarType.FP32) self.assertEqual(tensor.dtype, core.VarDesc.VarType.FP32)
self.assertTrue(np.array_equal(tensor.numpy(), arr1)) self.assertTrue(np.array_equal(tensor.numpy(), arr1))
...@@ -764,6 +763,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): ...@@ -764,6 +763,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase):
self.assertTrue(np.array_equal(res3, res4)) self.assertTrue(np.array_equal(res3, res4))
def test_backward_with_single_tensor(self): def test_backward_with_single_tensor(self):
with _test_eager_guard():
arr4 = np.random.rand(4, 16, 16, 32).astype('float32') arr4 = np.random.rand(4, 16, 16, 32).astype('float32')
egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace()) egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace())
egr_tensor12.retain_grads() egr_tensor12.retain_grads()
......
...@@ -267,7 +267,7 @@ class TestImperative(unittest.TestCase): ...@@ -267,7 +267,7 @@ class TestImperative(unittest.TestCase):
tmp = l1.weight * 2 tmp = l1.weight * 2
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
x = paddle.to_tensor(data) x = paddle.to_tensor(data)
y = l0(x) + tmp y = paddle.add(l0(x), tmp)
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -285,7 +285,7 @@ class TestImperative(unittest.TestCase): ...@@ -285,7 +285,7 @@ class TestImperative(unittest.TestCase):
tmp = l1.weight * 2 tmp = l1.weight * 2
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
x = paddle.to_tensor(data) x = paddle.to_tensor(data)
y = l0(x) + tmp y = paddle.add(l0(x), tmp)
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -306,7 +306,7 @@ class TestImperative(unittest.TestCase): ...@@ -306,7 +306,7 @@ class TestImperative(unittest.TestCase):
self.assertTrue(tmp.stop_gradient) self.assertTrue(tmp.stop_gradient)
self.assertTrue(tmp2.stop_gradient is False) self.assertTrue(tmp2.stop_gradient is False)
x = paddle.to_tensor(data) x = paddle.to_tensor(data)
y = l0(x) + tmp2 y = paddle.add(l0(x), tmp2)
o = l1(y) o = l1(y)
o.backward() o.backward()
...@@ -329,7 +329,7 @@ class TestImperative(unittest.TestCase): ...@@ -329,7 +329,7 @@ class TestImperative(unittest.TestCase):
tmp = paddle.to_tensor(x) tmp = paddle.to_tensor(x)
tmp.stop_gradient = False tmp.stop_gradient = False
inputs.append(tmp) inputs.append(tmp)
ret = fluid.layers.sums(inputs) ret = paddle.add_n(inputs)
loss = fluid.layers.reduce_sum(ret) loss = fluid.layers.reduce_sum(ret)
loss.backward() loss.backward()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
......
...@@ -308,7 +308,6 @@ packages=['paddle', ...@@ -308,7 +308,6 @@ packages=['paddle',
'paddle.fluid.dygraph', 'paddle.fluid.dygraph',
'paddle.fluid.dygraph.dygraph_to_static', 'paddle.fluid.dygraph.dygraph_to_static',
'paddle.fluid.dygraph.amp', 'paddle.fluid.dygraph.amp',
'paddle.fluid.eager',
'paddle.fluid.proto', 'paddle.fluid.proto',
'paddle.fluid.proto.profiler', 'paddle.fluid.proto.profiler',
'paddle.fluid.distributed', 'paddle.fluid.distributed',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册