未验证 提交 09acc860 编写于 作者: H HongyuJia 提交者: GitHub

[phi] Transfer merged_momentum yaml to phi (#45359)

* add legacy_api.yaml

* set merged_momentum inplace only

* support inplace optional<vector<tensor>>

* add dygraph_mode api

* optimize TensorToConstDenseTensorPtr
上级 46d04b0f
......@@ -1007,6 +1007,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
else:
assert IsVectorTensorType(ttype)
if is_optional:
if self.is_forward_only and is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
):
arg_str = f"paddle::optional<std::vector<paddle::experimental::Tensor>>& {name}"
else:
arg_str = f"const paddle::optional<std::vector<paddle::experimental::Tensor>>& {name}"
amp_tensors_vector_optional_list.append(
f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
......@@ -1014,6 +1018,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
amp_autocast_optional_list.append(
f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
)
else:
if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys(
):
arg_str = f"std::vector<paddle::experimental::Tensor>& {name}"
else:
arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}"
amp_tensors_vector_list.append(f"{name}")
......
......@@ -44,6 +44,34 @@ std::unique_ptr<std::vector<phi::DenseTensor*>> TensorToDenseTensor(
return pt_tensors;
}
std::vector<const phi::DenseTensor*> TensorToConstDenseTensorPtr(
const std::vector<Tensor>& tensors) {
std::vector<const phi::DenseTensor*> pt_tensors(tensors.size());
for (size_t i = 0; i < tensors.size(); ++i) {
pt_tensors[i] = static_cast<phi::DenseTensor*>(tensors[i].impl().get());
}
return pt_tensors;
}
paddle::optional<std::vector<const phi::DenseTensor*>>
TensorToConstDenseTensorPtr(
const paddle::optional<std::vector<Tensor>>& tensors) {
paddle::optional<std::vector<const phi::DenseTensor*>> pt_tensors;
if (tensors) {
pt_tensors =
paddle::optional<std::vector<const phi::DenseTensor*>>(tensors->size());
for (size_t i = 0; i < tensors->size(); ++i) {
pt_tensors->at(i) =
static_cast<phi::DenseTensor*>(tensors->at(i).impl().get());
}
}
return pt_tensors;
}
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor) {
return std::static_pointer_cast<phi::SelectedRows>(tensor.impl());
}
......@@ -140,6 +168,29 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
return results;
}
std::vector<phi::DenseTensor*> SetInplaceVectorKernelOutput(
size_t out_size, Backend backend, std::vector<Tensor>* out) {
std::vector<phi::DenseTensor*> results(out->size(), nullptr);
for (size_t i = 0; i < out->size(); ++i) {
results[i] = static_cast<phi::DenseTensor*>(out->at(i).impl().get());
}
return results;
}
std::vector<phi::DenseTensor*> SetInplaceOptionalVectorKernelOutput(
size_t out_size,
Backend backend,
const paddle::optional<std::vector<Tensor>>& out) {
std::vector<phi::DenseTensor*> results;
if (out) {
results = std::vector<phi::DenseTensor*>(out->size(), nullptr);
for (size_t i = 0; i < out->size(); ++i) {
results[i] = static_cast<phi::DenseTensor*>(out->at(i).impl().get());
}
}
return results;
}
std::vector<phi::DenseTensor*> SetKernelOutput(std::vector<Tensor*>* out) {
std::vector<phi::DenseTensor*> results(out->size(), nullptr);
for (size_t i = 0; i < out->size(); ++i) {
......
......@@ -38,6 +38,13 @@ paddle::optional<phi::DenseTensor> TensorToDenseTensor(
std::unique_ptr<std::vector<phi::DenseTensor*>> TensorToDenseTensor(
const std::vector<Tensor>& tensors);
std::vector<const phi::DenseTensor*> TensorToConstDenseTensorPtr(
const std::vector<Tensor>& tensors);
paddle::optional<std::vector<const phi::DenseTensor*>>
TensorToConstDenseTensorPtr(
const paddle::optional<std::vector<Tensor>>& tensors);
std::shared_ptr<phi::SelectedRows> TensorToSelectedRows(const Tensor& tensor);
paddle::optional<phi::SelectedRows> TensorToSelectedRows(
......@@ -72,6 +79,14 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
Backend backend,
std::vector<Tensor>* out);
std::vector<phi::DenseTensor*> SetInplaceVectorKernelOutput(
size_t out_size, Backend backend, std::vector<Tensor>* out);
std::vector<phi::DenseTensor*> SetInplaceOptionalVectorKernelOutput(
size_t out_size,
Backend backend,
const paddle::optional<std::vector<Tensor>>& out);
// For backward api
std::vector<phi::DenseTensor*> SetKernelOutput(std::vector<Tensor*>* out);
......
......@@ -58,9 +58,14 @@ class BaseAPI(object):
def get_input_tensor_args(self, inplace_flag=False):
input_args = []
inplace_type_map = {
"const Tensor&": "Tensor&",
"const paddle::optional<Tensor>&": "paddle::optional<Tensor>&",
"const std::vector<Tensor>&": "std::vector<Tensor>&"
"const Tensor&":
"Tensor&",
"const paddle::optional<Tensor>&":
"paddle::optional<Tensor>&",
"const std::vector<Tensor>&":
"std::vector<Tensor>&",
"const paddle::optional<std::vector<Tensor>>&":
"paddle::optional<std::vector<Tensor>>&"
}
for name in self.inputs['names']:
name = name.split('@')[0]
......@@ -595,8 +600,15 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
if input_name in self.optional_vars:
if self.inputs['input_info'][
input_name] == "const paddle::optional<std::vector<Tensor>>&":
if input_name in self.inplace_map.values():
input_name_tensor_map[input_name].append(
(f"{PREFIX_TENSOR_NAME}{input_name}_vec", True))
(f"{PREFIX_TENSOR_NAME}{input_name}", True))
input_tensor_code = input_tensor_code + f"""
{code_indent} paddle::optional<std::vector<const phi::DenseTensor*>> {PREFIX_TENSOR_NAME}{input_name} = TensorToConstDenseTensorPtr({input_name});"""
else:
input_name_tensor_map[input_name].append(
(f"{PREFIX_TENSOR_NAME}{input_name}_vec",
True))
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});
{code_indent} paddle::optional<std::vector<const phi::DenseTensor*>> {PREFIX_TENSOR_NAME}{input_name};
......@@ -622,8 +634,15 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
elif self.inputs['input_info'][
input_name] == "const std::vector<Tensor>&":
if input_name in self.inplace_map.values():
input_name_tensor_map[input_name].append(
(f"{PREFIX_TENSOR_NAME}{input_name}_vec", True))
(f"{PREFIX_TENSOR_NAME}{input_name}", True))
input_tensor_code = input_tensor_code + f"""
{code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name} = TensorToConstDenseTensorPtr({input_name});"""
else:
input_name_tensor_map[input_name].append(
(f"{PREFIX_TENSOR_NAME}{input_name}_vec",
True))
input_tensor_code = input_tensor_code + f"""
{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag});
{code_indent} std::vector<const phi::DenseTensor*> {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size());
......@@ -699,19 +718,23 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
{code_indent} ddims_vec.clear();"""
for input_tensor, is_vector in input_name_tensor_map[input_name]:
if is_vector:
input_tensor_truncate = input_tensor[:-4]
if input_name in self.inplace_map.values():
input_tensor_truncate = input_tensor
if input_name in self.optional_vars:
input_tensor_code = input_tensor_code + f"""
{code_indent} if ({input_tensor[:-4]}){{
{code_indent} ddims_vec.reserve({input_tensor[:-4]}->size());
{code_indent} for (size_t i = 0; i < {input_tensor[:-4]}->size(); ++i) {{
{code_indent} ddims_vec.emplace_back((*{input_tensor[:-4]}->at(i)).dims());
{code_indent} if ({input_tensor_truncate}){{
{code_indent} ddims_vec.reserve({input_tensor_truncate}->size());
{code_indent} for (size_t i = 0; i < {input_tensor_truncate}->size(); ++i) {{
{code_indent} ddims_vec.emplace_back((*{input_tensor_truncate}->at(i)).dims());
{code_indent} }}
{code_indent} }}"""
else:
input_tensor_code = input_tensor_code + f"""
{code_indent} ddims_vec.reserve({input_tensor[:-4]}.size());
{code_indent} for (size_t i = 0; i < {input_tensor[:-4]}.size(); ++i) {{
{code_indent} ddims_vec.emplace_back((*{input_tensor[:-4]}[i]).dims());
{code_indent} ddims_vec.reserve({input_tensor_truncate}.size());
{code_indent} for (size_t i = 0; i < {input_tensor_truncate}.size(); ++i) {{
{code_indent} ddims_vec.emplace_back((*{input_tensor_truncate}[i]).dims());
{code_indent} }}"""
else:
input_tensor_code = input_tensor_code + f"""
......
......@@ -179,8 +179,7 @@ class ForwardAPI(BaseAPI):
for out_name in self.outputs['names']:
if out_name in self.inplace_map:
output_create = output_create + self.inplace_map[
out_name] + ', '
output_create += self.inplace_map[out_name] + ', '
else:
output_create += 'Tensor(), '
output_create = output_create[:-2] + '};'
......@@ -200,6 +199,13 @@ class ForwardAPI(BaseAPI):
if out_dtype_list[i] == 'std::vector<Tensor>':
assert self.outputs['out_size_expr'][i] is not None, \
f"{self.api}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api."
# Special case for inplace vector and inplace optional<vector>
if self.outputs['names'][i] in self.inplace_map:
set_out_func = "SetInplaceVectorKernelOutput"
if self.inplace_map[self.outputs['names']
[i]] in self.optional_vars:
set_out_func = "SetInplaceOptionalVectorKernelOutput"
get_out_code = f"std::get<{i}>(api_output)"
output_create = output_create + f"""
{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, {get_out_code});"""
......
......@@ -1780,6 +1780,17 @@
func : mean_all
backward : mean_all_grad
- api : merged_momentum_
args : (Tensor[] param, Tensor[] grad, Tensor[] velocity, Tensor[] learning_rate, Tensor[] master_param, float mu, bool use_nesterov = false, str[] regularization_method = {}, float[] regularization_coeff = {}, bool multi_precision = false, float rescale_grad = 1.0f)
output : Tensor[](param_out){param.size()}, Tensor[](velocity_out){param.size()}, Tensor[](master_param_out){param.size()}
infer_meta :
func : MergedMomentumInferMeta
optional: master_param
kernel :
func : merged_momentum
data_type : param
inplace : (param -> param_out), (velocity -> velocity_out), (master_param -> master_param_out)
- api : meshgrid
args : (Tensor[] inputs)
output : Tensor[]{inputs.size()}
......
......@@ -473,17 +473,28 @@ class Momentum(Optimizer):
find_master = self._multi_precision and key == 'FP16_LODTensor'
if framework._non_static_mode():
if in_dygraph_mode():
_, _, _ = _C_ops.final_state_merged_momentum_(
self._param_dict[key], grad_dict[key],
self._velocity_dict[key], lr_dict[key],
self._master_weight_dict[key], self._momentum,
self._use_nesterov,
self._regularization_method_dict[key],
self._regularization_coeff_dict[key], find_master,
self._rescale_grad)
else:
_, _, _ = _C_ops.merged_momentum(
self._param_dict[key], grad_dict[key],
self._velocity_dict[key], lr_dict[key],
self._master_weight_dict[key], self._param_dict[key],
self._velocity_dict[key], self._master_weight_dict[key],
'mu', self._momentum, 'use_nesterov',
self._use_nesterov, 'regularization_method',
self._master_weight_dict[key],
self._param_dict[key], self._velocity_dict[key],
self._master_weight_dict[key], 'mu', self._momentum,
'use_nesterov', self._use_nesterov,
'regularization_method',
self._regularization_method_dict[key],
'regularization_coeff',
self._regularization_coeff_dict[key], 'multi_precision',
find_master)
self._regularization_coeff_dict[key],
'multi_precision', find_master)
else:
inputs = {
"Param": self._param_dict[key],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册