diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 757b5453ec2ce43d498b4650406fe656f16df8e5..493a8d0b33f1d2538e3944c7d99a0a6487f460fe 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -248,6 +248,40 @@ PyObject* eager_api_get_grads_lists(PyObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* eager_api_get_grads_types(PyObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_TRY + auto tensor_list = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0); + + std::vector ret; + + for (auto& tensor : tensor_list) { + VLOG(6) << "Get grad for tensor: " << tensor.name(); + auto meta = egr::EagerUtils::nullable_autograd_meta(tensor); + if (!meta || meta->StopGradient()) { + ret.emplace_back(-1); + continue; + } + + auto& grad = meta->Grad(); + if (meta && grad.initialized()) { + if (grad.is_dense_tensor() && + (tensor.dtype() == paddle::experimental::DataType::FLOAT32 || + tensor.dtype() == paddle::experimental::DataType::FLOAT16)) { + ret.emplace_back( + paddle::framework::TransToProtoVarType(tensor.dtype())); + } + } else { + ret.emplace_back(-1); + } + } + + return ToPyObject(ret); + + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* eager_api_read_next_tensor_list(PyObject* self, PyObject* args, PyObject* kwargs) { @@ -1067,6 +1101,10 @@ PyMethodDef variable_functions[] = { (PyCFunction)(void (*)(void))eager_api_get_grads_lists, METH_VARARGS | METH_KEYWORDS, NULL}, + {"get_grads_types", + (PyCFunction)(void (*)(void))eager_api_get_grads_types, + METH_VARARGS | METH_KEYWORDS, + NULL}, {"read_next_tensor_list", (PyCFunction)(void (*)(void))eager_api_read_next_tensor_list, METH_VARARGS | METH_KEYWORDS, diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index b4f65faf64a5e6987d87c032657a319864922265..04f9e20aa284869a7d54d80d792c4c6e5477455d 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -721,11 +721,14 @@ PyObject* ToPyObject(const std::vector& value, } PyObject* ToPyObject( - const std::vector>& value) { + const std::vector>& value, + bool return_py_none_if_not_initialize) { PyObject* result = PyList_New((Py_ssize_t)value.size()); for (size_t i = 0; i < value.size(); i++) { - PyList_SET_ITEM(result, static_cast(i), ToPyObject(value[i])); + PyList_SET_ITEM(result, + static_cast(i), + ToPyObject(value[i], return_py_none_if_not_initialize)); } return result; diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 145eeacc049f91f3e2c6eee88e585382f96183e9..ea24711fabd23443e6bb237ddceee056add2a6d3 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -104,7 +104,8 @@ PyObject* ToPyObject(const std::vector>& value); PyObject* ToPyObject(const std::vector& value, bool return_py_none_if_not_initialize = false); PyObject* ToPyObject( - const std::vector>& value); + const std::vector>& value, + bool return_py_none_if_not_initialize = false); PyObject* ToPyObject(const platform::Place& value); PyObject* ToPyObject(const phi::DenseTensor* value); PyObject* ToPyObject(const phi::SelectedRows* value); diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index f298a358bb7e9418e6b4f9cf2f1ba163ae526eb2..74499b05f24ae5a5ceb5784d3b057565d10ce052 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -28,6 +28,8 @@ from paddle import _C_ops, _legacy_C_ops __all__ = [] +GRAD_TYPES = [int(paddle.float32), int(paddle.float16)] + class Adam(Optimizer): r""" @@ -644,26 +646,47 @@ class Adam(Optimizer): lr_dict = {'FP32_LODTensor': [], 'FP16_LODTensor': []} if isinstance(parameters_and_grads, list): - for param_and_grad in parameters_and_grads: - if param_and_grad[1] is None: - continue - if param_and_grad[0].stop_gradient is False: - if ( - param_and_grad[0].dtype == paddle.float32 - and param_and_grad[1].type - == core.VarDesc.VarType.LOD_TENSOR - ): - grad_dict['FP32_LODTensor'].append(param_and_grad[1]) - lr = self._create_param_lr(param_and_grad) + if framework.in_dygraph_mode(): + params = [pair[0] for pair in parameters_and_grads] + grads_types = core.eager.get_grads_types(params) + for index, tp in enumerate(grads_types): + if tp == GRAD_TYPES[0]: + grad_dict['FP32_LODTensor'].append( + parameters_and_grads[index][1] + ) + lr = self._create_param_lr(parameters_and_grads[index]) lr_dict['FP32_LODTensor'].append(lr) - elif ( - param_and_grad[0].dtype == paddle.float16 - and param_and_grad[1].type - == core.VarDesc.VarType.LOD_TENSOR - ): - grad_dict['FP16_LODTensor'].append(param_and_grad[1]) - lr = self._create_param_lr(param_and_grad) + elif tp == GRAD_TYPES[1]: + grad_dict['FP16_LODTensor'].append( + parameters_and_grads[index][1] + ) + lr = self._create_param_lr(parameters_and_grads[index]) lr_dict['FP16_LODTensor'].append(lr) + else: + for param_and_grad in parameters_and_grads: + if param_and_grad[1] is None: + continue + if param_and_grad[0].stop_gradient is False: + if ( + param_and_grad[0].dtype == paddle.float32 + and param_and_grad[1].type + == core.VarDesc.VarType.LOD_TENSOR + ): + grad_dict['FP32_LODTensor'].append( + param_and_grad[1] + ) + lr = self._create_param_lr(param_and_grad) + lr_dict['FP32_LODTensor'].append(lr) + elif ( + param_and_grad[0].dtype == paddle.float16 + and param_and_grad[1].type + == core.VarDesc.VarType.LOD_TENSOR + ): + grad_dict['FP16_LODTensor'].append( + param_and_grad[1] + ) + lr = self._create_param_lr(param_and_grad) + lr_dict['FP16_LODTensor'].append(lr) else: for param_and_grad in parameters_and_grads['params']: if param_and_grad[1] is None: