From 35a5e8ee9c7bb06728c64b0ac6971e56b11b59fc Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Fri, 18 Mar 2022 11:03:05 +0800 Subject: [PATCH] Refactored Final State Python-C Code Generation Scripts (#40650) * Refactored Final State Python-C Code Generation Scripts. * Bug fix --- .../final_state_generator/CMakeLists.txt | 1 + .../final_state_generator/python_c_gen.py | 495 ++++++++++++------ 2 files changed, 329 insertions(+), 167 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt index 53af6c1048d..771351dd4af 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt @@ -27,6 +27,7 @@ add_custom_target(eager_final_state_codegen set(tmp_python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h") set(python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/eager_final_state_op_function_impl.h") + add_custom_target(eager_final_state_python_c_codegen COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py" "--api_yaml_path=${api_yaml_path}" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py index c0ed77ecdc4..753c8ca3aaf 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py @@ -14,9 +14,18 @@ import os import argparse +import logging from eager_gen import namespace, yaml_types_mapping, ReadFwdFile, ParseDispensable, IsVectorTensorType, GetForwardFunctionName, ParseYamlForward, DetermineForwardPositionMap -skipped_fwd_api_names = set(["scale"]) +########################### +## Global Configurations ## +########################### +skipped_forward_api_names = set(["scale"]) + + +def SkipAPIGeneration(forward_api_name): + return (forward_api_name in skipped_forward_api_names) + atype_to_parsing_function = { "bool": "CastPyArg2Boolean", @@ -39,64 +48,31 @@ atype_to_parsing_function = { } -def ParseArguments(): - parser = argparse.ArgumentParser( - description='Eager Code Generator Args Parser') - parser.add_argument('--api_yaml_path', type=str) - parser.add_argument('--output_path', type=str) - - args = parser.parse_args() - return args - - def FindParsingFunctionFromAttributeType(atype): if atype not in atype_to_parsing_function.keys(): - print(f"Unable to find {atype} in atype_to_parsing_function.") - assert False + assert False, f"Unable to find {atype} in atype_to_parsing_function." return atype_to_parsing_function[atype] -def GeneratePythonCFunction(fwd_api_name, forward_inputs_position_map, - forward_attrs_list, forward_outputs_position_map, - optional_inputs, is_forward_only): - # forward_inputs_position_map = { "name" : [type, fwd_position] } - # forward_outputs_position_map = { "name" : [type, fwd_position] } - # forward_attrs_list = [ [attr_name, attr_type, default_value, orig_position], ...] - # optional_inputs = [name0, ...] - - # Get EagerTensor from args - # Get dygraph function call args - num_args = len(forward_inputs_position_map.keys()) + len(forward_attrs_list) - num_input_tensors = len(forward_inputs_position_map.keys()) - dygraph_function_call_list = ["" for i in range(num_args)] - get_eager_tensor_str = "" - for name, (ttype, pos) in forward_inputs_position_map.items(): - is_optional = (name in optional_inputs) - if IsVectorTensorType(ttype): - get_eager_tensor_str += f" auto {name} = GetTensorListFromArgs(\"{fwd_api_name}\", \"{name}\", args, {pos}, false);\n" - else: - if is_optional: - get_eager_tensor_str += f" auto {name} = GetOptionalTensorFromArgs(\"{fwd_api_name}\", \"{name}\", args, {pos}, false);\n" - else: - get_eager_tensor_str += f" auto {name} = GetTensorFromArgs(\"{fwd_api_name}\", \"{name}\", args, {pos}, false);\n" - dygraph_function_call_list[pos] = f"{name}" +########################## +## Refactored Functions ## +########################## +PARSE_PYTHON_C_TENSORS_TEMPLATE = \ +" auto {} = {}(\"{}\", \"{}\", args, {}, false);\n" + - parse_attributes_str = "" - # Get Attributes - for name, atype, _, pos in forward_attrs_list: - parsing_function = FindParsingFunctionFromAttributeType(atype) - key = f"{name}" +PARSE_PYTHON_C_ARGS_TEMPLATE = \ +""" PyObject* {}_obj = PyTuple_GET_ITEM(args, {});\n + {} {} = {}({}_obj, \"{}\", {});\n""" - parse_attributes_str += f" PyObject* {name}_obj = PyTuple_GET_ITEM(args, {pos});\n" - parse_attributes_str += f" {atype} {name} = {parsing_function}({name}_obj, \"{fwd_api_name}\", {pos});\n" - dygraph_function_call_list[pos] = f"{name}" - dygraph_function_call_str = ",".join(dygraph_function_call_list) +RECORD_EVENT_TEMPLATE = \ +" paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::Operator, 1);" - pythonc_event_str = f"paddle::platform::RecordEvent pythonc_record_event(\"{fwd_api_name} pybind_imperative_func\", paddle::platform::TracerEventType::Operator, 1);" - PYTHON_C_FUNCTION_TEMPLATE = """ +PYTHON_C_FUNCTION_TEMPLATE = \ +""" static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObject *kwargs) {{ {} @@ -130,26 +106,50 @@ static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObj }} """ - namespace_str = "" - if len(namespace) > 0: - namespace_str = f"{namespace}::" - if is_forward_only: - fwd_function_name = "paddle::experimental::" + namespace_str + fwd_api_name - else: - fwd_function_name = namespace_str + GetForwardFunctionName(fwd_api_name) - python_c_function_str = PYTHON_C_FUNCTION_TEMPLATE.format( - fwd_api_name, pythonc_event_str, fwd_api_name, get_eager_tensor_str, - parse_attributes_str, fwd_function_name, dygraph_function_call_str) +FUNCTION_NAME_TEMPLATE = \ +"{}{}{}" - python_c_function_reg_str = f"{{\"final_state_{fwd_api_name}\", (PyCFunction)(void(*)(void)) {namespace_str}eager_final_state_api_{fwd_api_name}, METH_VARARGS | METH_KEYWORDS, \"C++ interface function for {fwd_api_name} in dygraph.\"}}\n" - return python_c_function_str, python_c_function_reg_str +PYTHON_C_FUNCTION_REG_TEMPLATE = \ +"{{\"final_state_{}\", (PyCFunction)(void(*)(void)) {}eager_final_state_api_{}, METH_VARARGS | METH_KEYWORDS, \"C++ interface function for {} in dygraph.\"}}" -def GenerateCoreOpsInfoMap(): - result = """ +PYTHON_C_WRAPPER_TEMPLATE = \ +""" +#pragma once + +#include "pybind11/detail/common.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/api/lib/dygraph_api.h" +#include "paddle/phi/common/backend.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/phi/common/scalar.h" +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/api/include/sparse_api.h" +#include "paddle/fluid/pybind/op_function_common.h" +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/pybind/exception.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" +#include + +namespace paddle {{ +namespace pybind {{ + +{} + +static PyMethodDef EagerFinalStateMethods[] = {{ + {} +}}; + +}} // namespace pybind +}} // namespace paddle +""" + + +CORE_OPS_INFO = \ +""" static PyObject * eager_get_final_state_core_ops_args_info(PyObject *self) { PyThreadState *tstate = nullptr; try @@ -194,9 +194,11 @@ static PyObject * eager_get_final_state_core_ops_returns_info(PyObject *self) { return nullptr; } } - """ +""" + - core_ops_infos_registry = """ +CORE_OPS_INFO_REGISTRY = \ +""" {\"get_final_state_core_ops_args_info\", (PyCFunction)(void(*)(void))eager_get_final_state_core_ops_args_info, METH_NOARGS, \"C++ interface function for eager_get_final_state_core_ops_args_info.\"}, @@ -209,7 +211,259 @@ static PyObject * eager_get_final_state_core_ops_returns_info(PyObject *self) { METH_NOARGS, \"C++ interface function for eager_get_final_state_core_ops_returns_info.\"}, """ - return result, core_ops_infos_registry +NAMESPACE_WRAPPER_TEMPLATE = \ +"""namespace {} {{ + {} +}} +""" + + +####################### +## Generator Classes ## +####################### +class PythonCSingleFunctionGenerator: + def __init__(self, fwd_api_contents, namespace): + self.fwd_api_contents = fwd_api_contents + self.namespace = namespace + + # Raw Contents + self.forward_api_name = "" + self.forward_args_str = "" + self.forward_returns_str = "" + + # Raw Data + self.forward_attrs_list = None #[ [attr_name, attr_type, default_value, orig_position], ...] + self.forward_inputs_list = None #[ [arg_name, arg_type, orig_position], ...] + self.forward_returns_list = None #[ [ret_name, ret_type, orig_position], ...] + + # Processed Data + self.forward_inputs_position_map = None #{ "name" : [type, fwd_position] } + self.forward_outputs_position_map = None #{ "name" : [type, fwd_position] } + + # Special Op Attributes + self.optional_inputs = [] #[name, ...] + self.is_forward_only = True + + # Generated Results + self.python_c_function_str = "" + self.python_c_function_reg_str = "" + + def CollectRawContents(self): + fwd_api_contents = self.fwd_api_contents + + assert 'api' in fwd_api_contents.keys( + ), "Unable to find \"api\" in fwd_api_contents keys" + assert 'args' in fwd_api_contents.keys( + ), "Unable to find \"args\" in fwd_api_contents keys" + assert 'output' in fwd_api_contents.keys( + ), "Unable to find \"output\" in fwd_api_contents keys" + + self.forward_api_name = fwd_api_contents['api'] + self.forward_args_str = fwd_api_contents['args'] + self.forward_returns_str = fwd_api_contents['output'] + + def CollectIsForwardOnly(self): + fwd_api_contents = self.fwd_api_contents + self.is_forward_only = False if 'backward' in fwd_api_contents.keys( + ) else True + + def CollectOptionalInputs(self): + fwd_api_contents = self.fwd_api_contents + if 'optional' in fwd_api_contents.keys(): + self.optional_inputs = ParseDispensable(fwd_api_contents[ + 'optional']) + + def CollectForwardInOutAttr(self): + forward_args_str = self.forward_args_str + forward_returns_str = self.forward_returns_str + + self.forward_inputs_list, self.forward_attrs_list, self.forward_returns_list = ParseYamlForward( + forward_args_str, forward_returns_str) + + def CollectForwardPositionMap(self): + forward_inputs_list = self.forward_inputs_list + forward_returns_list = self.forward_returns_list + + self.forward_inputs_position_map, self.forward_outputs_position_map = DetermineForwardPositionMap( + forward_inputs_list, forward_returns_list) + + def GeneratePythonCFunction(self): + namespace = self.namespace + forward_api_name = self.forward_api_name + forward_attrs_list = self.forward_attrs_list + forward_inputs_position_map = self.forward_inputs_position_map + forward_outputs_position_map = self.forward_outputs_position_map + optional_inputs = self.optional_inputs + is_forward_only = self.is_forward_only + + # Generate Python-C Tensors Parsing Logic + get_eager_tensor_str = "" + for name, (ttype, pos) in forward_inputs_position_map.items(): + is_optional = (name in optional_inputs) + if IsVectorTensorType(ttype): + get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_TEMPLATE.format( + name, "GetTensorListFromArgs", forward_api_name, name, pos) + else: + if is_optional: + get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_TEMPLATE.format( + name, "GetOptionalTensorFromArgs", forward_api_name, + name, pos) + else: + get_eager_tensor_str += PARSE_PYTHON_C_TENSORS_TEMPLATE.format( + name, "GetTensorFromArgs", forward_api_name, name, pos) + + parse_attributes_str = "" + + # Generate Python-C Attributes Parsing Logic + for name, atype, _, pos in forward_attrs_list: + parsing_function_name = FindParsingFunctionFromAttributeType(atype) + parse_attributes_str += PARSE_PYTHON_C_ARGS_TEMPLATE.format( + name, pos, atype, name, parsing_function_name, name, + forward_api_name, pos) + + # Generate Dygraph Function Call Logic + num_args = len(forward_inputs_position_map.keys()) + len( + forward_attrs_list) + dygraph_function_call_list = ["" for i in range(num_args)] + for name, (_, pos) in forward_inputs_position_map.items(): + dygraph_function_call_list[pos] = f"{name}" + for name, _, _, pos in forward_attrs_list: + dygraph_function_call_list[pos] = f"{name}" + dygraph_function_call_str = ",".join(dygraph_function_call_list) + + # Generate Python-C Function Definitions + if is_forward_only: + fwd_function_name = FUNCTION_NAME_TEMPLATE.format( + "paddle::experimental::", namespace, forward_api_name) + else: + fwd_function_name = FUNCTION_NAME_TEMPLATE.format( + "", namespace, GetForwardFunctionName(forward_api_name)) + + # Generate Record Event for performance profiling + pythonc_record_event_str = RECORD_EVENT_TEMPLATE.format( + "pythonc_record_event", forward_api_name, "pybind_imperative_func") + self.python_c_function_str = PYTHON_C_FUNCTION_TEMPLATE.format( + forward_api_name, pythonc_record_event_str, forward_api_name, + get_eager_tensor_str, parse_attributes_str, fwd_function_name, + dygraph_function_call_str) + + # Generate Python-C Function Registration + self.python_c_function_reg_str = PYTHON_C_FUNCTION_REG_TEMPLATE.format( + forward_api_name, namespace, forward_api_name, forward_api_name) + + def run(self): + # Initialized is_forward_only + self.CollectIsForwardOnly() + + # Initialized forward_api_name, forward_args_str, forward_returns_str + self.CollectRawContents() + if SkipAPIGeneration(self.forward_api_name): return False + + # Initialized optional_inputs + self.CollectOptionalInputs() + + # Initialized forward_inputs_list, forward_returns_list, forward_attrs_list + self.CollectForwardInOutAttr() + logging.info( + f"Parsed Original Forward Inputs List: \n{self.forward_inputs_list}") + logging.info( + f"Prased Original Forward Attrs List: \n{self.forward_attrs_list}") + logging.info( + f"Parsed Original Forward Returns List: \n{self.forward_returns_list}" + ) + + # Initialized forward_inputs_position_map, forward_outputs_position_map + self.CollectForwardPositionMap() + logging.info( + f"Generated Forward Input Position Map: {self.forward_inputs_position_map}" + ) + logging.info( + f"Generated Forward Output Position Map: {self.forward_outputs_position_map}" + ) + + # Code Generation + self.GeneratePythonCFunction() + logging.info( + f"Generated Python-C Function: {self.python_c_function_str}") + logging.info( + f"Generated Python-C Function Declaration: {self.python_c_function_reg_str}" + ) + + return True + + +class PythonCYamlGenerator: + def __init__(self, path): + self.yaml_path = path + + self.namespace = "" + self.forward_api_list = [] + + # Generated Result + self.python_c_functions_reg_str = "" + self.python_c_functions_str = "" + + def ParseYamlContents(self): + yaml_path = self.yaml_path + self.forward_api_list = ReadFwdFile(yaml_path) + + def GeneratePythonCFunctions(self): + namespace = self.namespace + forward_api_list = self.forward_api_list + + for forward_api_content in forward_api_list: + f_generator = PythonCSingleFunctionGenerator(forward_api_content, + namespace) + status = f_generator.run() + + if status == True: + self.python_c_functions_reg_str += f_generator.python_c_function_reg_str + ",\n" + self.python_c_functions_str += f_generator.python_c_function_str + "\n" + + def InferNameSpace(self): + yaml_path = self.yaml_path + if "sparse" in yaml_path: + self.namespace = "sparse::" + + def AttachNamespace(self): + namespace = self.namespace + python_c_functions_str = self.python_c_functions_str + + if namespace != "": + if namespace.endswith("::"): + namespace = namespace[:-2] + self.python_c_functions_str = NAMESPACE_WRAPPER_TEMPLATE.format( + namespace, python_c_functions_str) + + def run(self): + # Infer namespace from yaml_path + self.InferNameSpace() + + # Read Yaml file + self.ParseYamlContents() + + # Code Generation + self.GeneratePythonCFunctions() + + # Wrap with namespace + self.AttachNamespace() + + +############################ +## Code Generation Helper ## +############################ +def ParseArguments(): + parser = argparse.ArgumentParser( + description='Eager Code Generator Args Parser') + parser.add_argument('--api_yaml_path', type=str) + parser.add_argument('--output_path', type=str) + + args = parser.parse_args() + return args + + +def GenerateCoreOpsInfoMap(): + return CORE_OPS_INFO, CORE_OPS_INFO_REGISTRY def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str): @@ -221,36 +475,6 @@ def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str): python_c_function_reg_str += core_ops_infos_registry python_c_function_reg_str += "\n {nullptr,nullptr,0,nullptr}" - PYTHON_C_WRAPPER_TEMPLATE = """ -#pragma once - -#include "pybind11/detail/common.h" -#include "paddle/phi/api/all.h" -#include "paddle/phi/api/lib/dygraph_api.h" -#include "paddle/phi/common/backend.h" -#include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/scalar.h" -#include "paddle/phi/common/scalar_array.h" -#include "paddle/phi/api/include/sparse_api.h" -#include "paddle/fluid/pybind/op_function_common.h" -#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" -#include "paddle/fluid/pybind/exception.h" -#include "paddle/fluid/platform/profiler/event_tracing.h" -#include - -namespace paddle {{ -namespace pybind {{ - -{} - -static PyMethodDef EagerFinalStateMethods[] = {{ - {} -}}; - -}} // namespace pybind -}} // namespace paddle - -""" python_c_str = PYTHON_C_WRAPPER_TEMPLATE.format(python_c_function_str, python_c_function_reg_str) @@ -264,86 +488,23 @@ def GeneratePythonCFile(filepath, python_c_str): if __name__ == "__main__": args = ParseArguments() - api_yaml_paths = args.api_yaml_path.split(",") - python_c_functions_reg_str = "" - python_c_functions_str = "" - + generated_python_c_functions = "" + generated_python_c_registration = "" for i in range(len(api_yaml_paths)): api_yaml_path = api_yaml_paths[i] - if "sparse" in api_yaml_path: - namespace = "sparse" - else: - namespace = "" - - fwd_api_list = ReadFwdFile(api_yaml_path) - - python_c_function_list = [] - python_c_function_reg_list = [] - for fwd_api in fwd_api_list: - - # We only generate Ops with grad - is_forward_only = False - if 'backward' not in fwd_api.keys(): - is_forward_only = True - - assert 'api' in fwd_api.keys() - assert 'args' in fwd_api.keys() - assert 'output' in fwd_api.keys() - - fwd_api_name = fwd_api['api'] - fwd_args_str = fwd_api['args'] - fwd_returns_str = fwd_api['output'] - - if fwd_api_name in skipped_fwd_api_names: - continue - - # Parse Dispensable Inputs - optional_inputs = [] - if 'optional' in fwd_api.keys(): - optional_inputs = ParseDispensable(fwd_api['optional']) - - # Collect Original Forward Inputs/Outputs and then perform validation checks - forward_inputs_list, forward_attrs_list, forward_returns_list = ParseYamlForward( - fwd_args_str, fwd_returns_str) - print("Parsed Original Forward Inputs List: ", forward_inputs_list) - print("Prased Original Forward Attrs List: ", forward_attrs_list) - print("Parsed Original Forward Returns List: ", - forward_returns_list) - - forward_inputs_position_map, forward_outputs_position_map = DetermineForwardPositionMap( - forward_inputs_list, forward_returns_list) - print("Generated Forward Input Position Map: ", - forward_inputs_position_map) - print("Generated Forward Output Position Map: ", - forward_outputs_position_map) - - python_c_function_str, python_c_function_reg_str = GeneratePythonCFunction( - fwd_api_name, forward_inputs_position_map, forward_attrs_list, - forward_outputs_position_map, optional_inputs, is_forward_only) - python_c_function_list.append(python_c_function_str) - python_c_function_reg_list.append(python_c_function_reg_str) - print("Generated Python-C Function: ", python_c_function_str) - - # Append Namespace - python_c_functions_reg_str += ",\n".join( - python_c_function_reg_list) + "," - python_c_functions = "\n".join(python_c_function_list) - if len(namespace) > 0: - python_c_functions_str += f"""namespace {namespace} {{ - {python_c_functions} -}} -""" + y_generator = PythonCYamlGenerator(api_yaml_path) + y_generator.run() - else: - python_c_functions_str += python_c_functions + generated_python_c_functions += y_generator.python_c_functions_str + "\n" + generated_python_c_registration += y_generator.python_c_functions_reg_str + "\n" - python_c_str = GeneratePythonCWrappers(python_c_functions_str, - python_c_functions_reg_str) + python_c_str = GeneratePythonCWrappers(generated_python_c_functions, + generated_python_c_registration) - print("Generated Python-C Codes: ", python_c_str) + logging.info(f"Generated Python-C Codes: \n{python_c_str}") output_path = args.output_path for path in [output_path]: -- GitLab