未验证 提交 10145cb6 编写于 作者: HappyHeavyRain's avatar HappyHeavyRain 提交者: GitHub

Add fuse_ops.yaml and fused_backward.yaml (#52010)

* add fused_yaml fused_backward

* fix eager_funciton bug

* add some comment of fused yaml file

* add 'support_dygraph_mode' configuration in fused yaml

* delete some 'fused_api.h' in include file

* add fused flag in api_gen
上级 7d416161
......@@ -7,16 +7,20 @@ paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/fluid/operators/ops_extra_info.cc
paddle/phi/api/backward/backward_api.h
paddle/phi/api/backward/fused_backward_api.h
paddle/phi/api/backward/sparse_bw_api.h
paddle/phi/api/include/api.h
paddle/phi/api/include/fused_api.h
paddle/phi/api/include/operants_base.h
paddle/phi/api/include/operants_manager.h
paddle/phi/api/include/sparse_api.h
paddle/phi/api/include/strings_api.h
paddle/phi/api/include/tensor_operants.h
paddle/phi/api/lib/api.cc
paddle/phi/api/lib/fused_api.cc
paddle/phi/api/lib/dygraph_api.*
paddle/phi/api/lib/backward_api.cc
paddle/phi/api/lib/fused_backward_api.cc
paddle/phi/api/lib/operants_manager.cc
paddle/phi/api/lib/sparse_api.cc
paddle/phi/api/lib/strings_api.cc
......@@ -85,6 +89,7 @@ tools/nvcc_lazy
paddle/fluid/operators/generated_op*.cc
paddle/fluid/operators/generated_sparse_op.cc
paddle/fluid/operators/generated_static_op.cc
paddle/fluid/operators/generated_fused_op.cc
paddle/phi/ops/compat/generated_*.cc
paddle/phi/api/yaml/parsed_apis/
paddle/fluid/operators/generator/parsed_ops/
......
......@@ -26,6 +26,7 @@
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/api/include/fused_api.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......
set(api_yaml_path
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml"
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml"
)
set(backward_yaml_path
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml"
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml"
)
set(tmp_forwards_cc_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.cc"
......
......@@ -119,12 +119,31 @@ def ReadFwdFile(filepath):
# empty file loaded by yaml is None
contents = yaml.load(f, Loader=yaml.FullLoader)
f.close()
# not all fused ops supoort dygraph
if filepath.endswith("fused_ops.yaml") is True:
new_apis = [
api
for api in contents
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
contents = new_apis
return contents if contents is not None else []
def ReadBwdFile(filepath):
f = open(filepath, 'r')
contents = yaml.load(f, Loader=yaml.FullLoader)
# not all fused ops supoort dygraph
if filepath.endswith("fused_backward.yaml") is True:
new_apis = [
api
for api in contents
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
contents = new_apis
ret = {}
if contents is not None:
for content in contents:
......
......@@ -336,6 +336,7 @@ NODE_CC_FILE_TEMPLATE = """
#include "glog/logging.h"
#include "paddle/phi/api/all.h"
#include "paddle/phi/api/backward/backward_api.h"
#include "paddle/phi/api/backward/fused_backward_api.h"
#include "paddle/phi/api/backward/sparse_bw_api.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/framework/op_registry.h"
......
......@@ -6,6 +6,7 @@ set(op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml)
set(legacy_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml)
set(bw_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml)
set(static_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_ops.yaml)
set(fused_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml)
set(legacy_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml)
set(sparse_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml)
......@@ -13,6 +14,8 @@ set(sparse_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml)
set(static_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_backward.yaml)
set(fused_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml)
if(NOT PYTHONINTERP_FOUND)
find_package(PythonInterp REQUIRED)
......@@ -40,10 +43,14 @@ set(generated_op_path_4
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_op4.cc)
set(generated_static_op_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_static_op.cc)
set(generated_fused_op_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_fused_op.cc)
set(generated_sparse_ops_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_sparse_op.cc)
set(generated_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc)
set(generated_fused_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_fused_sig.cc)
set(generated_static_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_static_sig.cc)
set(generated_sparse_argument_mapping_path
......@@ -54,7 +61,9 @@ message(
- ${op_yaml_file}
- ${legacy_op_yaml_file}
- ${bw_op_yaml_file}
- ${legacy_bw_op_yaml_file}")
- ${legacy_bw_op_yaml_file}
- ${fused_op_yaml_file}
- ${static_op_yaml_file}")
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
......@@ -69,6 +78,8 @@ execute_process(
--output_path ./parsed_ops/legacy_backward_ops.parsed.yaml --backward
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_op_yaml_file}
--output_path ./parsed_ops/static_ops.parsed.yaml
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${fused_op_yaml_file}
--output_path ./parsed_ops/fused_ops.parsed.yaml
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${sparse_op_yaml_file}
--output_path ./parsed_ops/sparse_ops.parsed.yaml
COMMAND
......@@ -77,6 +88,9 @@ execute_process(
COMMAND
${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_bw_op_yaml_file}
--output_path ./parsed_ops/static_backward.parsed.yaml --backward
COMMAND
${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${fused_bw_op_yaml_file}
--output_path ./parsed_ops/fused_backward.parsed.yaml --backward
RESULTS_VARIABLE _results)
foreach(_result in ${_results})
if(${_result})
......@@ -111,6 +125,17 @@ if(${_result})
message(FATAL_ERROR "static ops validation failed, exiting.")
endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
${PYTHON_EXECUTABLE} cross_validate.py --forward_yaml_paths
./parsed_ops/fused_ops.parsed.yaml --backward_yaml_paths
./parsed_ops/fused_backward.parsed.yaml
RESULT_VARIABLE _result)
if(${_result})
message(FATAL_ERROR "fused ops validation failed, exiting.")
endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
......@@ -158,6 +183,21 @@ if(${_result})
message(FATAL_ERROR "operator codegen failed, exiting.")
endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
${PYTHON_EXECUTABLE} generate_op.py --ops_yaml_path
./parsed_ops/fused_ops.parsed.yaml --backward_yaml_path
./parsed_ops/fused_backward.parsed.yaml --op_version_yaml_path
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/op_version.yaml
--op_compat_yaml_path ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/op_compat.yaml
--output_op_path "${generated_fused_op_path}.tmp" --output_arg_map_path
"${generated_fused_argument_mapping_path}.tmp"
RESULT_VARIABLE _result)
if(${_result})
message(FATAL_ERROR "operator codegen failed, exiting.")
endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
......@@ -177,10 +217,12 @@ set(generated_static_files
"${generated_op_path_3}"
"${generated_op_path_4}"
"${generated_static_op_path}"
"${generated_fused_op_path}"
"${generated_sparse_ops_path}"
"${generated_argument_mapping_path}"
"${generated_static_argument_mapping_path}"
"${generated_sparse_argument_mapping_path}")
"${generated_sparse_argument_mapping_path}"
"${generated_fused_argument_mapping_path}")
foreach(generated_static_file ${generated_static_files})
if(EXISTS "${generated_static_file}.tmp" AND EXISTS
......
......@@ -340,6 +340,7 @@ def check_op_config(op_entry, op_name):
'no_need_buffer',
'data_transform',
'composite',
'support_dygraph_mode',
)
infer_meta_key_set = ('func', 'param')
kernel_key_set = (
......
......@@ -27,6 +27,7 @@ limitations under the License. */
// new phi apis
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/api/include/fused_api.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/phi/api/include/tensor.h"
......
......@@ -67,6 +67,26 @@ set(dygraph_api_source_file
set(dygraph_api_header_file_tmp ${dygraph_api_header_file}.tmp)
set(dygraph_api_source_file_tmp ${dygraph_api_source_file}.tmp)
# fused_op forward api file
set(fused_api_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml)
set(fused_api_header_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/include/fused_api.h)
set(fused_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/fused_api.cc)
set(fused_api_header_file_tmp ${fused_api_header_file}.tmp)
set(fused_api_source_file_tmp ${fused_api_source_file}.tmp)
# fused_op backward api file
set(fused_bw_api_gen_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/backward_api_gen.py)
set(fused_bw_api_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml)
set(fused_bw_api_header_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/fused_backward_api.h)
set(fused_bw_api_source_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/fused_backward_api.cc)
set(fused_bw_api_header_file_tmp ${fused_bw_api_header_file}.tmp)
set(fused_bw_api_source_file_tmp ${fused_bw_api_source_file}.tmp)
# sparse api file
set(sparse_api_gen_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/sparse_api_gen.py)
......@@ -171,6 +191,40 @@ add_custom_command(
${legacy_bw_api_yaml_file}
VERBATIM)
# generate fused_op api
add_custom_command(
OUTPUT ${fused_api_header_file} ${fused_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file}
--is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp}
--api_source_path ${fused_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp}
${fused_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp}
${fused_api_source_file}
COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}"
DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base}
VERBATIM)
# generate fused_op backward api
add_custom_command(
OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file}
${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp}
COMMAND
${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path
${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path
${fused_bw_api_header_file_tmp} --backward_source_path
${fused_bw_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp}
${fused_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp}
${fused_bw_api_source_file}
COMMENT
"copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}"
DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
VERBATIM)
# generate sparse api
add_custom_command(
OUTPUT ${sparse_api_header_file} ${sparse_api_source_file}
......@@ -333,7 +387,7 @@ cc_library(
phi_profiler)
cc_library(
phi_function_api
SRCS ${api_source_file}
SRCS ${api_source_file} ${fused_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
......@@ -344,7 +398,7 @@ cc_library(
phi_profiler)
cc_library(
phi_bw_function_api
SRCS ${bw_api_source_file}
SRCS ${bw_api_source_file} ${fused_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
......
......@@ -605,16 +605,6 @@
kernel :
func : frame_grad
- backward_op : fused_dropout_add_grad
forward : fused_dropout_add (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
args : (Tensor seed_offset, Tensor out_grad, Scalar p, bool is_test, str mode, bool fix_seed)
output : Tensor(x_grad), Tensor(y_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out_grad, out_grad]
kernel :
func : fused_dropout_add_grad
- backward_op : gather_nd_grad
forward : gather_nd (Tensor x, Tensor index) -> Tensor(out)
args : (Tensor x, Tensor index, Tensor out_grad)
......
# This file is designed for fusion C++ backward operators, which manages the
# generated code for dynamic mode and static mode.
# The operators in the file have extra configuration item "support_dygraph_mode".
# If one operator have "support_dygraph_mode : True", it supports dygraph mode.
- backward_op : fused_dropout_add_grad
forward : fused_dropout_add (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
args : (Tensor seed_offset, Tensor out_grad, Scalar p, bool is_test, str mode, bool fix_seed)
output : Tensor(x_grad), Tensor(y_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out_grad, out_grad]
kernel :
func : fused_dropout_add_grad
support_dygraph_mode : true
# This file is designed for fusion C++ farward operators, which manages the
# generated code for dynamic mode and static mode.
# The operators in the file have extra configuration item "support_dygraph_mode".
# If one operator have "support_dygraph_mode : True", it supports dygraph mode.
- op : embedding_with_eltwise_add_xpu
args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx)
output: Tensor
infer_meta :
func: EmbeddingWithEltwiseAddXPUInferMeta
kernel:
func: embedding_with_eltwise_add_xpu
data_type: tables
- op : fc_xpu
args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output : Tensor(out), Tensor(out_max)
infer_meta :
func : FcXPUInferMeta
kernel :
func : fc_xpu
data_type : x
optional : bias, x_max
- op : fused_dropout_add
args : (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
output : Tensor(out), Tensor(seed_offset)
infer_meta :
func : FusedDropoutAddInferMeta
kernel :
func : fused_dropout_add
data_type : x
backward : fused_dropout_add_grad
support_dygraph_mode : true
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
support_dygraph_mode : true
- op : fused_multi_transformer_xpu
args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id)
output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
infer_meta :
func : FusedMultiTransformerXpuInferMeta
kernel :
func : fused_multi_transformer_xpu
data_type : x
optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask
- op : generate_sequence_xpu
args : (Tensor x, DataType dtype)
output : Tensor
infer_meta :
func : GenerateSequenceXPUInferMeta
kernel :
func : generate_sequence_xpu
data_type : dtype
- op : multi_encoder_xpu
args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
infer_meta :
func : MultiEncoderXPUInferMeta
kernel :
func : multi_encoder_xpu
data_type : x
optional : mask, x_fp16, out_fp16
......@@ -406,7 +406,9 @@ PD_DECLARE_API(from_blob);
"""
def generate_api(api_yaml_path, header_file_path, source_file_path):
def generate_api(
api_yaml_path, is_fused_ops_yaml, header_file_path, source_file_path
):
apis = []
for each_api_yaml in api_yaml_path:
......@@ -424,7 +426,21 @@ def generate_api(api_yaml_path, header_file_path, source_file_path):
header_file.write(header_include())
header_file.write(namespace[0])
include_header_file = "paddle/phi/api/include/api.h"
include_header_file = (
"paddle/phi/api/include/fused_api.h"
if is_fused_ops_yaml is True
else "paddle/phi/api/include/api.h"
)
# not all fused ops supoort dygraph
if is_fused_ops_yaml is True:
new_apis = [
api
for api in apis
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
apis = new_apis
source_file.write(source_include(include_header_file))
source_file.write(namespace[0])
......@@ -456,6 +472,12 @@ def main():
default=['paddle/phi/api/yaml/ops.yaml'],
)
parser.add_argument(
'--is_fused_ops_yaml',
help='flag of fused ops yaml',
action='store_true',
)
parser.add_argument(
'--api_header_path',
help='output of generated api header code file',
......@@ -471,10 +493,13 @@ def main():
options = parser.parse_args()
api_yaml_path = options.api_yaml_path
is_fused_ops_yaml = options.is_fused_ops_yaml
header_file_path = options.api_header_path
source_file_path = options.api_source_path
generate_api(api_yaml_path, header_file_path, source_file_path)
generate_api(
api_yaml_path, is_fused_ops_yaml, header_file_path, source_file_path
)
if __name__ == '__main__':
......
......@@ -269,7 +269,7 @@ def header_include():
"""
def source_include(header_file_path):
def source_include(header_file_path, fw_header_file_path):
return f"""
#include "{header_file_path}"
#include <memory>
......@@ -282,7 +282,7 @@ def source_include(header_file_path):
#include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/common/type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/api/include/api.h"
#include "{fw_header_file_path}"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
......@@ -310,7 +310,10 @@ namespace experimental {
def generate_backward_api(
backward_yaml_path, header_file_path, source_file_path
backward_yaml_path,
is_fused_backward_yaml,
header_file_path,
source_file_path,
):
bw_apis = []
......@@ -329,9 +332,29 @@ def generate_backward_api(
header_file.write(header_include())
header_file.write(namespace[0])
include_header_file = "paddle/phi/api/backward/backward_api.h"
source_file.write(source_include(include_header_file))
include_header_file = (
"paddle/phi/api/backward/fused_backward_api.h"
if is_fused_backward_yaml
else "paddle/phi/api/backward/backward_api.h"
)
include_fw_header_file = (
"paddle/phi/api/include/fused_api.h"
if is_fused_backward_yaml
else "paddle/phi/api/include/api.h"
)
source_file.write(
source_include(include_header_file, include_fw_header_file)
)
source_file.write(namespace[0])
# not all fused ops supoort dygraph
if is_fused_backward_yaml is True:
new_bw_apis = [
bw_api
for bw_api in bw_apis
if "support_dygraph_mode" in bw_api
and bw_api["support_dygraph_mode"] is True
]
bw_apis = new_bw_apis
for bw_api in bw_apis:
bw_api = BackwardAPI(bw_api)
......@@ -355,6 +378,13 @@ def main():
nargs='+',
default=['paddle/phi/api/yaml/backward.yaml'],
)
parser.add_argument(
'--is_fused_backward_yaml',
help='flag of fused backward yaml',
action='store_true',
)
parser.add_argument(
'--backward_header_path',
help='output of generated backward header code file',
......@@ -370,11 +400,15 @@ def main():
options = parser.parse_args()
backward_yaml_path = options.backward_yaml_path
is_fused_backward_yaml = options.is_fused_backward_yaml
header_file_path = options.backward_header_path
source_file_path = options.backward_source_path
generate_backward_api(
backward_yaml_path, header_file_path, source_file_path
backward_yaml_path,
is_fused_backward_yaml,
header_file_path,
source_file_path,
)
......
......@@ -726,16 +726,6 @@
optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather
args : (Tensor x, Tensor index, Scalar(int) axis=0)
output : Tensor(out)
......
......@@ -584,16 +584,6 @@
func : frame
backward : frame_grad
- op : fused_dropout_add
args : (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
output : Tensor(out), Tensor(seed_offset)
infer_meta :
func : FusedDropoutAddInferMeta
kernel :
func : fused_dropout_add
data_type : x
backward : fused_dropout_add_grad
- op : gather_nd
args : (Tensor x, Tensor index)
output : Tensor
......
......@@ -47,15 +47,6 @@
func : broadcast
param: [x, root]
- op : embedding_with_eltwise_add_xpu
args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx)
output: Tensor
infer_meta :
func: EmbeddingWithEltwiseAddXPUInferMeta
kernel:
func: embedding_with_eltwise_add_xpu
data_type: tables
- op : equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out)
......@@ -68,16 +59,6 @@
backend : x
force_backend : force_cpu
- op : fc_xpu
args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output : Tensor(out), Tensor(out_max)
infer_meta :
func : FcXPUInferMeta
kernel :
func : fc_xpu
data_type : x
optional : bias, x_max
- op : frobenius_norm
args : (Tensor x, IntArray axis={0}, bool keepdim=false, bool reduce_all=false, int in_dtype=-1, int out_dtype=-1)
output : Tensor(out)
......@@ -88,25 +69,6 @@
param : [x, axis, keepdim, reduce_all]
backward : frobenius_norm_grad
- op : fused_multi_transformer_xpu
args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id)
output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
infer_meta :
func : FusedMultiTransformerXpuInferMeta
kernel :
func : fused_multi_transformer_xpu
data_type : x
optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask
- op : generate_sequence_xpu
args : (Tensor x, DataType dtype)
output : Tensor
infer_meta :
func : GenerateSequenceXPUInferMeta
kernel :
func : generate_sequence_xpu
data_type : dtype
- op : greater_equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out)
......@@ -155,16 +117,6 @@
backend : x
force_backend : force_cpu
- op : multi_encoder_xpu
args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
infer_meta :
func : MultiEncoderXPUInferMeta
kernel :
func : multi_encoder_xpu
data_type : x
optional : mask, x_fp16, out_fp16
- op : not_equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册