未验证 提交 5900129c 编写于 作者: W WangZhen 提交者: GitHub

Get grads types from cpp for adam to speed up (#47769)

Get grads types from cpp for adam to speed up
上级 8d99dd0c
...@@ -248,6 +248,40 @@ PyObject* eager_api_get_grads_lists(PyObject* self, ...@@ -248,6 +248,40 @@ PyObject* eager_api_get_grads_lists(PyObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
PyObject* eager_api_get_grads_types(PyObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
auto tensor_list = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0);
std::vector<int> ret;
for (auto& tensor : tensor_list) {
VLOG(6) << "Get grad for tensor: " << tensor.name();
auto meta = egr::EagerUtils::nullable_autograd_meta(tensor);
if (!meta || meta->StopGradient()) {
ret.emplace_back(-1);
continue;
}
auto& grad = meta->Grad();
if (meta && grad.initialized()) {
if (grad.is_dense_tensor() &&
(tensor.dtype() == paddle::experimental::DataType::FLOAT32 ||
tensor.dtype() == paddle::experimental::DataType::FLOAT16)) {
ret.emplace_back(
paddle::framework::TransToProtoVarType(tensor.dtype()));
}
} else {
ret.emplace_back(-1);
}
}
return ToPyObject(ret);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* eager_api_read_next_tensor_list(PyObject* self, static PyObject* eager_api_read_next_tensor_list(PyObject* self,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
...@@ -1067,6 +1101,10 @@ PyMethodDef variable_functions[] = { ...@@ -1067,6 +1101,10 @@ PyMethodDef variable_functions[] = {
(PyCFunction)(void (*)(void))eager_api_get_grads_lists, (PyCFunction)(void (*)(void))eager_api_get_grads_lists,
METH_VARARGS | METH_KEYWORDS, METH_VARARGS | METH_KEYWORDS,
NULL}, NULL},
{"get_grads_types",
(PyCFunction)(void (*)(void))eager_api_get_grads_types,
METH_VARARGS | METH_KEYWORDS,
NULL},
{"read_next_tensor_list", {"read_next_tensor_list",
(PyCFunction)(void (*)(void))eager_api_read_next_tensor_list, (PyCFunction)(void (*)(void))eager_api_read_next_tensor_list,
METH_VARARGS | METH_KEYWORDS, METH_VARARGS | METH_KEYWORDS,
......
...@@ -721,11 +721,14 @@ PyObject* ToPyObject(const std::vector<paddle::experimental::Tensor>& value, ...@@ -721,11 +721,14 @@ PyObject* ToPyObject(const std::vector<paddle::experimental::Tensor>& value,
} }
PyObject* ToPyObject( PyObject* ToPyObject(
const std::vector<std::vector<paddle::experimental::Tensor>>& value) { const std::vector<std::vector<paddle::experimental::Tensor>>& value,
bool return_py_none_if_not_initialize) {
PyObject* result = PyList_New((Py_ssize_t)value.size()); PyObject* result = PyList_New((Py_ssize_t)value.size());
for (size_t i = 0; i < value.size(); i++) { for (size_t i = 0; i < value.size(); i++) {
PyList_SET_ITEM(result, static_cast<Py_ssize_t>(i), ToPyObject(value[i])); PyList_SET_ITEM(result,
static_cast<Py_ssize_t>(i),
ToPyObject(value[i], return_py_none_if_not_initialize));
} }
return result; return result;
......
...@@ -104,7 +104,8 @@ PyObject* ToPyObject(const std::vector<std::vector<size_t>>& value); ...@@ -104,7 +104,8 @@ PyObject* ToPyObject(const std::vector<std::vector<size_t>>& value);
PyObject* ToPyObject(const std::vector<paddle::experimental::Tensor>& value, PyObject* ToPyObject(const std::vector<paddle::experimental::Tensor>& value,
bool return_py_none_if_not_initialize = false); bool return_py_none_if_not_initialize = false);
PyObject* ToPyObject( PyObject* ToPyObject(
const std::vector<std::vector<paddle::experimental::Tensor>>& value); const std::vector<std::vector<paddle::experimental::Tensor>>& value,
bool return_py_none_if_not_initialize = false);
PyObject* ToPyObject(const platform::Place& value); PyObject* ToPyObject(const platform::Place& value);
PyObject* ToPyObject(const phi::DenseTensor* value); PyObject* ToPyObject(const phi::DenseTensor* value);
PyObject* ToPyObject(const phi::SelectedRows* value); PyObject* ToPyObject(const phi::SelectedRows* value);
......
...@@ -28,6 +28,8 @@ from paddle import _C_ops, _legacy_C_ops ...@@ -28,6 +28,8 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [] __all__ = []
GRAD_TYPES = [int(paddle.float32), int(paddle.float16)]
class Adam(Optimizer): class Adam(Optimizer):
r""" r"""
...@@ -644,6 +646,23 @@ class Adam(Optimizer): ...@@ -644,6 +646,23 @@ class Adam(Optimizer):
lr_dict = {'FP32_LODTensor': [], 'FP16_LODTensor': []} lr_dict = {'FP32_LODTensor': [], 'FP16_LODTensor': []}
if isinstance(parameters_and_grads, list): if isinstance(parameters_and_grads, list):
if framework.in_dygraph_mode():
params = [pair[0] for pair in parameters_and_grads]
grads_types = core.eager.get_grads_types(params)
for index, tp in enumerate(grads_types):
if tp == GRAD_TYPES[0]:
grad_dict['FP32_LODTensor'].append(
parameters_and_grads[index][1]
)
lr = self._create_param_lr(parameters_and_grads[index])
lr_dict['FP32_LODTensor'].append(lr)
elif tp == GRAD_TYPES[1]:
grad_dict['FP16_LODTensor'].append(
parameters_and_grads[index][1]
)
lr = self._create_param_lr(parameters_and_grads[index])
lr_dict['FP16_LODTensor'].append(lr)
else:
for param_and_grad in parameters_and_grads: for param_and_grad in parameters_and_grads:
if param_and_grad[1] is None: if param_and_grad[1] is None:
continue continue
...@@ -653,7 +672,9 @@ class Adam(Optimizer): ...@@ -653,7 +672,9 @@ class Adam(Optimizer):
and param_and_grad[1].type and param_and_grad[1].type
== core.VarDesc.VarType.LOD_TENSOR == core.VarDesc.VarType.LOD_TENSOR
): ):
grad_dict['FP32_LODTensor'].append(param_and_grad[1]) grad_dict['FP32_LODTensor'].append(
param_and_grad[1]
)
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
lr_dict['FP32_LODTensor'].append(lr) lr_dict['FP32_LODTensor'].append(lr)
elif ( elif (
...@@ -661,7 +682,9 @@ class Adam(Optimizer): ...@@ -661,7 +682,9 @@ class Adam(Optimizer):
and param_and_grad[1].type and param_and_grad[1].type
== core.VarDesc.VarType.LOD_TENSOR == core.VarDesc.VarType.LOD_TENSOR
): ):
grad_dict['FP16_LODTensor'].append(param_and_grad[1]) grad_dict['FP16_LODTensor'].append(
param_and_grad[1]
)
lr = self._create_param_lr(param_and_grad) lr = self._create_param_lr(param_and_grad)
lr_dict['FP16_LODTensor'].append(lr) lr_dict['FP16_LODTensor'].append(lr)
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册