diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index db0da96493c20949800865c494cf8857ee4f88bc..b1ace9c76417c8cbb2a0f7e022da18f6de8543f8 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -190,6 +190,41 @@ static PyObject* eager_api_tensor_copy(PyObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* eager_api_get_grads_lists(PyObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_TRY + auto tensor_list = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0); + + std::vector> ret(3); + + for (auto& tensor : tensor_list) { + VLOG(6) << "Get grad for tensor: " << tensor.name(); + auto meta = egr::EagerUtils::nullable_autograd_meta(tensor); + VLOG(6) << meta << " initialized: " << meta->Grad().initialized(); + if (meta && meta->Grad().initialized()) { + auto& grad = meta->Grad(); + switch (grad.dtype()) { + case paddle::experimental::DataType::FLOAT16: + ret[0].emplace_back(grad); + break; + case paddle::experimental::DataType::BFLOAT16: + ret[1].emplace_back(grad); + break; + case paddle::experimental::DataType::FLOAT32: + ret[2].emplace_back(grad); + break; + default: + break; + } + } + } + + return ToPyObject(ret); + + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* eager_api_read_next_tensor_list(PyObject* self, PyObject* args, PyObject* kwargs) { @@ -1001,6 +1036,10 @@ PyMethodDef variable_functions[] = { (PyCFunction)(void (*)(void))eager_api_tensor_copy, METH_VARARGS | METH_KEYWORDS, NULL}, + {"get_grads_lists", + (PyCFunction)(void (*)(void))eager_api_get_grads_lists, + METH_VARARGS | METH_KEYWORDS, + NULL}, {"read_next_tensor_list", (PyCFunction)(void (*)(void))eager_api_read_next_tensor_list, METH_VARARGS | METH_KEYWORDS, diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 895a715c8bad82c9490bede76f2b97a8f62838f3..1237e4092f02f5c1a326f614e38833643d8a63a5 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -720,6 +720,17 @@ PyObject* ToPyObject(const std::vector& value, return result; } +PyObject* ToPyObject( + const std::vector>& value) { + PyObject* result = PyList_New((Py_ssize_t)value.size()); + + for (size_t i = 0; i < value.size(); i++) { + PyList_SET_ITEM(result, static_cast(i), ToPyObject(value[i])); + } + + return result; +} + PyObject* ToPyObject(const platform::Place& value) { auto obj = ::pybind11::cast(value); obj.inc_ref(); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 845b681163e0d55c48651de5ffdebcff6060c163..145eeacc049f91f3e2c6eee88e585382f96183e9 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -103,6 +103,8 @@ PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector>& value); PyObject* ToPyObject(const std::vector& value, bool return_py_none_if_not_initialize = false); +PyObject* ToPyObject( + const std::vector>& value); PyObject* ToPyObject(const platform::Place& value); PyObject* ToPyObject(const phi::DenseTensor* value); PyObject* ToPyObject(const phi::SelectedRows* value); diff --git a/python/paddle/fluid/dygraph/amp/loss_scaler.py b/python/paddle/fluid/dygraph/amp/loss_scaler.py index 6ab153e4a2e9a07cf4ed5ed50f211566300b5de0..c59588e9d03b611fe3240f6edca52b42df9d4e3f 100644 --- a/python/paddle/fluid/dygraph/amp/loss_scaler.py +++ b/python/paddle/fluid/dygraph/amp/loss_scaler.py @@ -26,6 +26,7 @@ import numpy as np from paddle import _C_ops, _legacy_C_ops from collections import defaultdict from enum import Enum +from paddle.fluid import in_dygraph_mode __all__ = ['AmpScaler', 'OptimizerState'] @@ -297,26 +298,33 @@ class AmpScaler(object): else: param_grads_fp32.append(param._grad_ivar()) else: - param_grads = [ - param._grad_ivar() - for param in optimizer._parameter_list - if param._grad_ivar() is not None - ] - param_grads_fp16 = [ - param - for param in param_grads - if param.dtype == core.VarDesc.VarType.FP16 - ] - param_grads_bf16 = [ - param - for param in param_grads - if param.dtype == core.VarDesc.VarType.BF16 - ] - param_grads_fp32 = [ - param - for param in param_grads - if param.dtype == core.VarDesc.VarType.FP32 - ] + if in_dygraph_mode(): + ( + param_grads_fp16, + param_grads_bf16, + param_grads_fp32, + ) = core.eager.get_grads_lists(optimizer._parameter_list) + else: + param_grads = [ + param._grad_ivar() + for param in optimizer._parameter_list + if param._grad_ivar() is not None + ] + param_grads_fp16 = [ + param + for param in param_grads + if param.dtype == core.VarDesc.VarType.FP16 + ] + param_grads_bf16 = [ + param + for param in param_grads + if param.dtype == core.VarDesc.VarType.BF16 + ] + param_grads_fp32 = [ + param + for param in param_grads + if param.dtype == core.VarDesc.VarType.FP32 + ] if core.is_compiled_with_npu(): float_status = _legacy_C_ops.alloc_float_status() _legacy_C_ops.clear_float_status(float_status, float_status)