From 7cf7084bf0754f16f8cb354c197515b916cff974 Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Mon, 29 Aug 2022 17:20:35 +0800 Subject: [PATCH] support backward refuse forward dygraph (#45250) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support refuse forward dygraph * modify backward api exponential__grad yaml * remove print code * 当反向复用前向时进行需不需要更高阶的反向判断,如果不需要调用c++ api,需要的话则调用前向动态图生成反向节点 * fix some backward bugs * modify the generated dygraph function name --- .../generator/eager_gen.py | 72 ++++++++++++++++--- .../api/yaml/generator/backward_api_gen.py | 19 +++-- paddle/phi/api/yaml/legacy_backward.yaml | 28 +------- paddle/phi/api/yaml/sparse_bw_api.yaml | 2 +- 4 files changed, 81 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 0688cd5179..505dd9377c 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -292,6 +292,7 @@ NODE_CC_FILE_TEMPLATE = \ #include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h" +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/to_static/run_program_op_node.h" #include "paddle/fluid/eager/nan_inf_utils.h" @@ -436,6 +437,20 @@ inplace_optional_out_type_map = { } +def ExtractForwardApiNameFormInvoke(invoke_config): + api_name = invoke_config.split('(')[0] + if api_name[-1] == '_': + api_name = api_name[:-1] + return re.search( + r"(?P[a-zA-Z0-9_]+)(?P_intermediate)?", + api_name).group('api_name') + + +def IsInvokeForwardApi(api_contents, forward_api_name_list): + return 'invoke' in api_contents and ExtractForwardApiNameFormInvoke( + api_contents['invoke']) in forward_api_name_list + + ####################### ## Generator Helpers ## ####################### @@ -478,7 +493,8 @@ def GenerateCoreOpInfoDefinition(): ##################### class DygraphFunctionGeneratorBase(FunctionGeneratorBase): - def __init__(self, forward_api_contents, grad_api_contents, namespace): + def __init__(self, forward_api_contents, grad_api_contents, + forward_apis_dict, namespace): self.forward_api_contents = forward_api_contents # Members from Parent: #self.namespace @@ -495,6 +511,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): #self.forward_inplace_map FunctionGeneratorBase.__init__(self, forward_api_contents, namespace) + self.forward_apis_dict = forward_apis_dict self.grad_api_contents = grad_api_contents # Raw Contents @@ -935,9 +952,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): - def __init__(self, forward_api_contents, grad_api_contents, namespace): + def __init__(self, forward_api_contents, grad_api_contents, + forward_apis_dict, namespace): DygraphFunctionGeneratorBase.__init__(self, forward_api_contents, - grad_api_contents, namespace) + grad_api_contents, + forward_apis_dict, namespace) # Generated Results self.forward_definition_str = "" @@ -1299,10 +1318,12 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): def __init__(self, forward_api_contents, grad_api_contents, + forward_apis_dict, namespace, next_grad_api_contents=None): DygraphFunctionGeneratorBase.__init__(self, forward_api_contents, - grad_api_contents, namespace) + grad_api_contents, + forward_apis_dict, namespace) # Record name mapping from forward_var_name to grad_var_names self.to_next_grad_name_mapping = {} # {name : name} @@ -1346,6 +1367,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): def GenerateHigherOrderNodeCreationCode(self): namespace = self.namespace grad_api_contents = self.grad_api_contents + forward_apis_dict = self.forward_apis_dict next_grad_api_contents = self.next_grad_api_contents next_grad_node_creation_str = "" @@ -1358,7 +1380,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): backward_api_contents = next_grad_api_contents next_node_generator = DygraphFunctionGeneratorBase( - forward_api_contents, backward_api_contents, namespace) + forward_api_contents, backward_api_contents, forward_apis_dict, + namespace) next_node_generator.run() next_node_generator.GenerateNodeCreationCodes(True) @@ -1443,6 +1466,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): backward_inplace_map = self.backward_inplace_map indent = GetIndent(1) + is_invoke_forward_api = IsInvokeForwardApi(self.grad_api_contents, + self.forward_apis_dict) # Construct grad_api function args # Order: TensorWrappers, GradTensors, Attributes grad_api_args_len = len(backward_forward_inputs_map.keys()) + len( @@ -1575,11 +1600,18 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): optional_inplace_str = "" # Grad Outputs out_index = -1 + out_assign_str = "" for name, (ttype, fwd_position, grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) out_index = out_index + 1 - grad_api_args.append(f"api_output_{out_index}") + if is_invoke_forward_api: + if len(backward_grad_outputs_map) == 1: + out_assign_str += f"{indent}*api_output_{out_index} = api_output;\n" + else: + out_assign_str += f"{indent}*api_output_{out_index} = std::get<{out_index}>(api_output);\n" + else: + grad_api_args.append(f"api_output_{out_index}") if inplace_grad_input_str in optional_inplace_var_name: optional_inplace_str = "VLOG(6) << \"No Inplace should happend for wrappered input: {inplace_grad_input_str}\";" else: @@ -1621,7 +1653,24 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): grad_api_args_str = ", ".join(grad_api_args) - grad_function_call_str = f""" + if is_invoke_forward_api: + autograd_api_out = "auto" + if len(self.backward_inplace_map) > 0 and len( + backward_grad_outputs_map) == 1: + autograd_api_out = "auto&" + forward_api_name = self.grad_api_contents['invoke'].split( + '(')[0].strip() + autograd_api = self.grad_api_contents['invoke'].replace( + forward_api_name, forward_api_name + '_dygraph_function', 1) + grad_function_call_str = f""" + if (trace_backward) {{ + {indent}{autograd_api_out} api_output = {autograd_api}; + {out_assign_str}}} else {{ + {indent}{autograd_api_out} api_output = paddle::experimental::{self.namespace}{self.grad_api_contents['invoke']}; + {out_assign_str}{indent}}} + """ + else: + grad_function_call_str = f""" {indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});""" # Check Nan and Inf @@ -1631,7 +1680,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): # Prepare for Node Creation if Necessary outputs_autograd_meta_str = "" compute_require_next_grad_str = "" - if len(next_grad_node_creation_str) > 0: + if len(next_grad_node_creation_str) > 0 or is_invoke_forward_api: compute_require_next_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n" # 3. Get Output AutoGradMeta @@ -1754,6 +1803,9 @@ class DygraphForwardAndNodesGenerator(GeneratorBase): def GenerateCode(self): forward_api_list = self.forward_api_list grad_api_dict = self.grad_api_dict + forward_apis_dict = {} + for api_item in forward_api_list: + forward_apis_dict[api_item['api']] = api_item namespace = self.namespace for forward_api_contents in forward_api_list: @@ -1769,7 +1821,8 @@ class DygraphForwardAndNodesGenerator(GeneratorBase): # Generate Dygraph Forward Function function_generator = DygraphForwardFunctionGenerator( - forward_api_contents, backward_api_contents, namespace) + forward_api_contents, backward_api_contents, forward_apis_dict, + namespace) function_generator.run() self.forward_definition_str += function_generator.forward_definition_str + "\n" @@ -1784,6 +1837,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase): node_generator = DygraphNodeGenerator(forward_api_contents, backward_api_contents, + forward_apis_dict, namespace, next_grad_api_contents) node_generator.run() diff --git a/paddle/phi/api/yaml/generator/backward_api_gen.py b/paddle/phi/api/yaml/generator/backward_api_gen.py index 187f8e8e4f..436adcf5af 100644 --- a/paddle/phi/api/yaml/generator/backward_api_gen.py +++ b/paddle/phi/api/yaml/generator/backward_api_gen.py @@ -97,6 +97,18 @@ class BackwardAPI(BaseAPI): def gene_return_code(self): return "" + def gene_api_declaration(self): + if not self.is_base_api: + invoke_func_name = self.invoke.split('(')[0] + if (not invoke_func_name.endswith("_grad")) and ( + not invoke_func_name.endswith('_impl')): + return "" + api_func_name = self.get_api_func_name() + api_declaration = f""" +PADDLE_API void {api_func_name}({self.get_declare_args()}); +""" + return api_declaration + def gene_kernel_backend_select(self): all_no_need_buffer = True for in_name in self.inputs['names']: @@ -178,17 +190,14 @@ class BackwardAPI(BaseAPI): def gene_invoke_code(self, invoke_code, params_code): invoke_func_name = invoke_code.split('(')[0].strip() if invoke_func_name.endswith('_grad') or invoke_func_name.endswith( - '_grad_impl'): + '_impl'): return f""" PADDLE_API {self.get_return_type()} {self.api}({params_code}) {{ {invoke_code}; }}""" else: - return f""" -PADDLE_API {self.get_return_type()} {self.api}({params_code}) {{ - *{self.outputs['names'][0].split('@')[0]} = {invoke_code}; -}}""" + return "" def header_include(): diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 26884f260f..307e7c453a 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -834,7 +834,7 @@ output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta - invoke : zeros_like(out_grad, DataType::UNDEFINED, {}) + invoke : zeros_like(out_grad) - backward_api : fill_diagonal_grad forward : fill_diagonal (Tensor x, float value, int offset, bool wrap) -> Tensor(out) @@ -1510,7 +1510,7 @@ - backward_api : mean_double_grad forward: mean_grad (Tensor x, Tensor grad_out, int64_t[] dims={}, bool keep_dim=false, bool reduce_all = false) -> Tensor(grad_x) - args : (Tensor grad_x_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false) + args : (Tensor grad_x_grad, int64_t[] dims={}, bool keep_dim=false) output : Tensor(grad_out_grad) invoke : mean(grad_x_grad, dims, keep_dim) @@ -2050,26 +2050,11 @@ backward : rsqrt_double_grad inplace : (out_grad -> x_grad) -- backward_api : scale_double_grad - forward : scale_grad (Tensor grad_out, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_x) - args : (Tensor grad_x_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true) - output : Tensor(grad_out_grad) - invoke : scale(grad_x_grad, scale, 0.0, bias_after_scale) - backward : scale_triple_grad - - backward_api : scale_grad forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out) - args : (Tensor out_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true) + args : (Tensor out_grad, Scalar scale=1.0, bool bias_after_scale=true) output : Tensor(x_grad) invoke : scale(out_grad, scale, 0.0, bias_after_scale) - backward : scale_double_grad - inplace : (out_grad -> x_grad) - -- backward_api : scale_triple_grad - forward : scale_double_grad (Tensor grad_grad_x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_grad_out) - args : (Tensor grad_grad_out_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true) - output : Tensor(grad_grad_x_grad) - invoke : scale(grad_grad_out_grad, scale, 0.0, bias_after_scale) - backward_api : scatter_grad forward : scatter (Tensor x, Tensor index, Tensor updates, bool overwrite) -> Tensor(out) @@ -2416,7 +2401,6 @@ args : (Tensor grad_x_grad, int64_t[] dims={}, bool keep_dim=false) output : Tensor(grad_out_grad) invoke : sum(grad_x_grad, dims, grad_x_grad.dtype(), keep_dim) - backward : sum_triple_grad - backward_api : sum_grad forward : sum (Tensor x, int64_t[] dims={}, DataType out_dtype=DataType::UNDEFINED, bool keep_dim=false) -> Tensor(out) @@ -2430,12 +2414,6 @@ no_need_buffer : x backward : sum_double_grad -- backward_api : sum_triple_grad - forward : sum_double_grad (Tensor grad_grad_x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(grad_grad_out) - args : (Tensor grad_grad_x, Tensor grad_grad_out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false) - output : Tensor(grad_grad_x_grad) - invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all, grad_grad_x_grad) - - backward_api : svd_grad forward : svd (Tensor x, bool full) -> Tensor(u), Tensor(s), Tensor(vh) args : (Tensor x, Tensor u, Tensor vh, Tensor s, Tensor u_grad, Tensor vh_grad, Tensor s_grad, bool full) diff --git a/paddle/phi/api/yaml/sparse_bw_api.yaml b/paddle/phi/api/yaml/sparse_bw_api.yaml index cf8de8ceea..b30687f3af 100644 --- a/paddle/phi/api/yaml/sparse_bw_api.yaml +++ b/paddle/phi/api/yaml/sparse_bw_api.yaml @@ -106,7 +106,7 @@ forward : dense_to_coo(Tensor x, int64_t sparse_dim) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) - invoke : to_dense_impl(out_grad) + invoke : coo_to_dense(out_grad) - backward_api : divide_grad forward : divide(Tensor x, Tensor y) -> Tensor(out) -- GitLab