未验证 提交 9a459efb 编写于 作者: P pangyoki 提交者: GitHub

inplace op without backward supports input of multiple inplace vars (#43787)

* inplace op without backward supports input of multiple inplace vars

* support inplace without backward in eager mode and fix python_c in eager_fluid

* fix output in yaml

* fix map to const&

* solve conflict v2
上级 a97a8dd1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -77,9 +77,7 @@ RECORD_EVENT_TEMPLATE = \ ...@@ -77,9 +77,7 @@ RECORD_EVENT_TEMPLATE = \
RETURN_INPLACE_PYOBJECT_TEMPLATE = \ RETURN_INPLACE_PYOBJECT_TEMPLATE = \
""" """
ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_final_state_args_info, \"final_state_{}\", \"{}\"); inplace_var_idx_map[{}] = {};
ssize_t return_id = GetIdxFromCoreOpsInfoMap(core_ops_final_state_returns_info, \"final_state_{}\", \"{}\");
return ToPyObject(out, return_id, args, arg_id);
""" """
...@@ -246,6 +244,7 @@ NAMESPACE_WRAPPER_TEMPLATE = \ ...@@ -246,6 +244,7 @@ NAMESPACE_WRAPPER_TEMPLATE = \
## Generator Classes ## ## Generator Classes ##
####################### #######################
class PythonCSingleFunctionGenerator(FunctionGeneratorBase): class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
def __init__(self, forward_api_contents, namespace): def __init__(self, forward_api_contents, namespace):
# Members from Parent: # Members from Parent:
#self.namespace #self.namespace
...@@ -258,7 +257,7 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -258,7 +257,7 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
#self.forward_outputs_position_map #self.forward_outputs_position_map
#self.optional_inputs #self.optional_inputs
#self.no_need_buffers #self.no_need_buffers
#self.intermediate_outputs #self.intermediate_outputs
#self.forward_inplace_map #self.forward_inplace_map
FunctionGeneratorBase.__init__(self, forward_api_contents, namespace) FunctionGeneratorBase.__init__(self, forward_api_contents, namespace)
...@@ -283,9 +282,13 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -283,9 +282,13 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
optional_inputs = self.optional_inputs optional_inputs = self.optional_inputs
is_forward_only = self.is_forward_only is_forward_only = self.is_forward_only
inplace_args_pos_map = {}
inplace_returns_pos_map = {}
# Generate Python-C Tensors Parsing Logic # Generate Python-C Tensors Parsing Logic
get_eager_tensor_str = "" get_eager_tensor_str = ""
for name, (ttype, pos) in forward_inputs_position_map.items(): for name, (ttype, pos) in forward_inputs_position_map.items():
if forward_inplace_map and name in forward_inplace_map.keys():
inplace_args_pos_map[name] = pos
is_optional = (name in optional_inputs) is_optional = (name in optional_inputs)
if IsVectorTensorType(ttype): if IsVectorTensorType(ttype):
get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_TEMPLATE.format( get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_TEMPLATE.format(
...@@ -301,6 +304,11 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -301,6 +304,11 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
name, "GetTensorFromArgs", forward_api_name, name, pos, name, "GetTensorFromArgs", forward_api_name, name, pos,
"false") "false")
if forward_inplace_map:
for name, (ttype, pos) in forward_outputs_position_map.items():
if name in forward_inplace_map.values():
inplace_returns_pos_map[name] = pos
parse_attributes_str = "" parse_attributes_str = ""
expected_place_str = " auto place = egr::Controller::Instance().GetExpectedPlace();\n" expected_place_str = " auto place = egr::Controller::Instance().GetExpectedPlace();\n"
...@@ -320,8 +328,8 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -320,8 +328,8 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
set_device_str = FUNCTION_SET_DEVICE_TEMPLATE.format(expected_place_str) set_device_str = FUNCTION_SET_DEVICE_TEMPLATE.format(expected_place_str)
# Generate Dygraph Function Call Logic # Generate Dygraph Function Call Logic
num_args = len(forward_inputs_position_map.keys()) + len( num_args = len(
orig_forward_attrs_list) forward_inputs_position_map.keys()) + len(orig_forward_attrs_list)
dygraph_function_call_list = ["" for i in range(num_args)] dygraph_function_call_list = ["" for i in range(num_args)]
for name, (_, pos) in forward_inputs_position_map.items(): for name, (_, pos) in forward_inputs_position_map.items():
dygraph_function_call_list[pos] = f"{name}" dygraph_function_call_list[pos] = f"{name}"
...@@ -371,14 +379,12 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -371,14 +379,12 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
"::", namespace, "::", namespace,
GetForwardFunctionName(inplaced_forward_api_name)) GetForwardFunctionName(inplaced_forward_api_name))
assert len( return_str = " std::map<ssize_t, ssize_t> inplace_var_idx_map;"
forward_inplace_map
) == 1, f"size of inplace_map must be 1, but inplace_map of \"{forward_api_name}\" op got {len(forward_inplace_map)}"
for inplace_input, inplace_output in forward_inplace_map.items(): for inplace_input, inplace_output in forward_inplace_map.items():
return_str = RETURN_INPLACE_PYOBJECT_TEMPLATE.format( return_str += RETURN_INPLACE_PYOBJECT_TEMPLATE.format(
inplaced_forward_api_name, inplace_input, inplace_returns_pos_map[inplace_output],
inplaced_forward_api_name, inplace_output) inplace_args_pos_map[inplace_input])
break return_str += " return ToPyObject(out, args, inplace_var_idx_map);"
# Generate Python-C Function Definetion # Generate Python-C Function Definetion
python_c_inplace_func_str = PYTHON_C_FUNCTION_TEMPLATE.format( python_c_inplace_func_str = PYTHON_C_FUNCTION_TEMPLATE.format(
...@@ -429,8 +435,9 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase): ...@@ -429,8 +435,9 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
class PythonCGenerator(GeneratorBase): class PythonCGenerator(GeneratorBase):
def __init__(self, path): def __init__(self, path):
# Parent members: # Parent members:
# self.namespace # self.namespace
# self.api_yaml_path # self.api_yaml_path
# self.forward_api_list # self.forward_api_list
...@@ -445,8 +452,8 @@ class PythonCGenerator(GeneratorBase): ...@@ -445,8 +452,8 @@ class PythonCGenerator(GeneratorBase):
forward_api_list = self.forward_api_list forward_api_list = self.forward_api_list
for forward_api_content in forward_api_list: for forward_api_content in forward_api_list:
f_generator = PythonCSingleFunctionGenerator(forward_api_content, f_generator = PythonCSingleFunctionGenerator(
namespace) forward_api_content, namespace)
status = f_generator.run() status = f_generator.run()
if status == True: if status == True:
......
...@@ -316,29 +316,24 @@ std::string GenerateOpFunctionsBody( ...@@ -316,29 +316,24 @@ std::string GenerateOpFunctionsBody(
} }
if (!inplace_map.empty()) { if (!inplace_map.empty()) {
// For inplace op, Use the input PyObject directly. // For inplace op, Use the input PyObject directly.
return_str = "std::map<ssize_t, ssize_t> inplace_var_idx_map;\n";
for (auto& inplace_pair : inplace_map) { for (auto& inplace_pair : inplace_map) {
// Find index of inplace tensor, and directly use input PyObject. // Find index of inplace tensor, and directly use input PyObject.
std::string inplace_arg_name = inplace_pair.second; std::string inplace_arg_name = inplace_pair.second;
std::string inplace_return_name = inplace_pair.first; std::string inplace_return_name = inplace_pair.first;
const char* RETURN_INPLACE_TENSOR_TEMPLATE = const char* RETURN_INPLACE_TENSOR_TEMPLATE =
"ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_args_info, " " ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_args_info, "
"\"%s\", \"%s\");\n" "\"%s\", \"%s\");\n"
" ssize_t return_id = " " ssize_t return_id = "
"GetIdxFromCoreOpsInfoMap(core_ops_returns_info, \"%s\", \"%s\");\n" "GetIdxFromCoreOpsInfoMap(core_ops_returns_info, \"%s\", \"%s\");\n"
" return ToPyObject(out, return_id, args, arg_id);"; " inplace_var_idx_map[return_id] = arg_id;";
return_str = paddle::string::Sprintf(RETURN_INPLACE_TENSOR_TEMPLATE, return_str += paddle::string::Sprintf(RETURN_INPLACE_TENSOR_TEMPLATE,
op_type, op_type,
inplace_arg_name, inplace_arg_name,
op_type, op_type,
inplace_return_name); inplace_return_name);
// only support one inplace_var in temporary.
PADDLE_ENFORCE_EQ(
inplace_map.size(),
1,
paddle::platform::errors::InvalidArgument(
"size of inplace_map must be 1, but got %d", inplace_map.size()));
break;
} }
return_str += " return ToPyObject(out, args, inplace_var_idx_map);";
} else { } else {
return_str = "return ToPyObject(out);"; return_str = "return ToPyObject(out);";
} }
......
...@@ -590,13 +590,18 @@ PyObject* ToPyObject(const paddle::experimental::Tensor& value, ...@@ -590,13 +590,18 @@ PyObject* ToPyObject(const paddle::experimental::Tensor& value,
} }
PyObject* ToPyObject(const paddle::experimental::Tensor& value, PyObject* ToPyObject(const paddle::experimental::Tensor& value,
ssize_t value_idx,
PyObject* args, PyObject* args,
ssize_t arg_idx) { const std::map<ssize_t, ssize_t>& inplace_var_idx_map) {
if (!inplace_var_idx_map.empty() && inplace_var_idx_map.count(0)) {
return ToPyObject(args, inplace_var_idx_map.at(0));
} else {
return ToPyObject(value);
}
}
PyObject* ToPyObject(PyObject* args, ssize_t arg_idx) {
// For inplace op, directly return the input PyObject of the inplace tensor. // For inplace op, directly return the input PyObject of the inplace tensor.
// [Parameter] // [Parameter]
// value: Useless parameter.
// value_idx: Useless parameter.
// args: Input PyObject. // args: Input PyObject.
// arg_idx: Index of inplace PyObject in input args. Used to find the input // arg_idx: Index of inplace PyObject in input args. Used to find the input
// inplace PyObject. // inplace PyObject.
......
...@@ -81,9 +81,9 @@ PyObject* ToPyObject(const std::string& value); ...@@ -81,9 +81,9 @@ PyObject* ToPyObject(const std::string& value);
PyObject* ToPyObject(const paddle::experimental::Tensor& value, PyObject* ToPyObject(const paddle::experimental::Tensor& value,
bool return_py_none_if_not_initialize = false); bool return_py_none_if_not_initialize = false);
PyObject* ToPyObject(const paddle::experimental::Tensor& value, PyObject* ToPyObject(const paddle::experimental::Tensor& value,
ssize_t value_idx,
PyObject* args, PyObject* args,
ssize_t arg_idx); const std::map<ssize_t, ssize_t>& inplace_var_idx_map);
PyObject* ToPyObject(PyObject* args, ssize_t arg_idx);
PyObject* ToPyObject(const std::vector<bool>& value); PyObject* ToPyObject(const std::vector<bool>& value);
PyObject* ToPyObject(const std::vector<int>& value); PyObject* ToPyObject(const std::vector<int>& value);
PyObject* ToPyObject(const std::vector<int64_t>& value); PyObject* ToPyObject(const std::vector<int64_t>& value);
...@@ -112,15 +112,13 @@ struct TupleTensorResult { ...@@ -112,15 +112,13 @@ struct TupleTensorResult {
static void Run(const Tuple& out, static void Run(const Tuple& out,
PyObject* result, PyObject* result,
ssize_t value_idx,
PyObject* args, PyObject* args,
ssize_t arg_idx) { const std::map<ssize_t, ssize_t>& inplace_var_idx_map) {
TupleTensorResult<Tuple, N - 1>::Run(out, result, value_idx, args, arg_idx); TupleTensorResult<Tuple, N - 1>::Run(
if (N - 1 == value_idx) { out, result, args, inplace_var_idx_map);
if (!inplace_var_idx_map.empty() && inplace_var_idx_map.count(N - 1)) {
PyTuple_SET_ITEM( PyTuple_SET_ITEM(
result, result, N - 1, ToPyObject(args, inplace_var_idx_map.at(N - 1)));
N - 1,
ToPyObject(std::get<N - 1>(out), value_idx, args, arg_idx));
} else { } else {
PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out))); PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out)));
} }
...@@ -135,12 +133,10 @@ struct TupleTensorResult<Tuple, 1> { ...@@ -135,12 +133,10 @@ struct TupleTensorResult<Tuple, 1> {
static void Run(const Tuple& out, static void Run(const Tuple& out,
PyObject* result, PyObject* result,
ssize_t value_idx,
PyObject* args, PyObject* args,
ssize_t arg_idx) { const std::map<ssize_t, ssize_t>& inplace_var_idx_map) {
if (value_idx == 0) { if (!inplace_var_idx_map.empty() && inplace_var_idx_map.count(0)) {
PyTuple_SET_ITEM( PyTuple_SET_ITEM(result, 0, ToPyObject(args, inplace_var_idx_map.at(0)));
result, 0, ToPyObject(std::get<0>(out), value_idx, args, arg_idx));
} else { } else {
PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out))); PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out)));
} }
...@@ -159,22 +155,23 @@ PyObject* ToPyObject(const std::tuple<Args...>& out) { ...@@ -159,22 +155,23 @@ PyObject* ToPyObject(const std::tuple<Args...>& out) {
template <typename... Args> template <typename... Args>
PyObject* ToPyObject(const std::tuple<Args...>& out, PyObject* ToPyObject(const std::tuple<Args...>& out,
ssize_t value_idx,
PyObject* args, PyObject* args,
ssize_t arg_idx) { const std::map<ssize_t, ssize_t>& inplace_var_idx_map) {
// For inplace op, directly return the input PyObject of the inplace tensor. // For inplace op, directly return the input PyObject of the inplace tensor.
// [Parameter] // [Parameter]
// out: Outputs tuple after executing op. // out: Outputs tuple after executing op.
// value_idx: Index of inplace tensor in outputs tuple. Used to find the
// output inplace tensor.
// args: Input PyObject. // args: Input PyObject.
// arg_idx: Index of inplace PyObject in input args. Used to find the input // inplace_var_idx_map: Index of Tensors in inplace_map, e.g. {{value_idx,
// arg_idx}}.
// - value_idx: Index of inplace tensor in outputs tuple. Used to find the
// output inplace tensor.
// - arg_idx: Index of inplace PyObject in input args. Used to find the input
// inplace PyObject. // inplace PyObject.
auto len = sizeof...(Args); auto len = sizeof...(Args);
PyObject* result = PyTuple_New(len); PyObject* result = PyTuple_New(len);
TupleTensorResult<decltype(out), sizeof...(Args)>::Run( TupleTensorResult<decltype(out), sizeof...(Args)>::Run(
out, result, value_idx, args, arg_idx); out, result, args, inplace_var_idx_map);
return result; return result;
} }
......
...@@ -1787,7 +1787,7 @@ ...@@ -1787,7 +1787,7 @@
- api : scale - api : scale
args : (Tensor x, Scalar scale, float bias, bool bias_after_scale) args : (Tensor x, Scalar scale, float bias, bool bias_after_scale)
output : Tensor output : Tensor(out)
infer_meta : infer_meta :
func : UnchangedInferMeta func : UnchangedInferMeta
param : [x] param : [x]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册