未验证 提交 10145cb6 编写于 作者: HappyHeavyRain's avatar HappyHeavyRain 提交者: GitHub

Add fuse_ops.yaml and fused_backward.yaml (#52010)

* add fused_yaml fused_backward

* fix eager_funciton bug

* add some comment of fused yaml file

* add 'support_dygraph_mode' configuration in fused yaml

* delete some 'fused_api.h' in include file

* add fused flag in api_gen
上级 7d416161
...@@ -7,16 +7,20 @@ paddle/fluid/op_use_default_grad_maker_DEV.spec ...@@ -7,16 +7,20 @@ paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/fluid/operators/ops_extra_info.cc paddle/fluid/operators/ops_extra_info.cc
paddle/phi/api/backward/backward_api.h paddle/phi/api/backward/backward_api.h
paddle/phi/api/backward/fused_backward_api.h
paddle/phi/api/backward/sparse_bw_api.h paddle/phi/api/backward/sparse_bw_api.h
paddle/phi/api/include/api.h paddle/phi/api/include/api.h
paddle/phi/api/include/fused_api.h
paddle/phi/api/include/operants_base.h paddle/phi/api/include/operants_base.h
paddle/phi/api/include/operants_manager.h paddle/phi/api/include/operants_manager.h
paddle/phi/api/include/sparse_api.h paddle/phi/api/include/sparse_api.h
paddle/phi/api/include/strings_api.h paddle/phi/api/include/strings_api.h
paddle/phi/api/include/tensor_operants.h paddle/phi/api/include/tensor_operants.h
paddle/phi/api/lib/api.cc paddle/phi/api/lib/api.cc
paddle/phi/api/lib/fused_api.cc
paddle/phi/api/lib/dygraph_api.* paddle/phi/api/lib/dygraph_api.*
paddle/phi/api/lib/backward_api.cc paddle/phi/api/lib/backward_api.cc
paddle/phi/api/lib/fused_backward_api.cc
paddle/phi/api/lib/operants_manager.cc paddle/phi/api/lib/operants_manager.cc
paddle/phi/api/lib/sparse_api.cc paddle/phi/api/lib/sparse_api.cc
paddle/phi/api/lib/strings_api.cc paddle/phi/api/lib/strings_api.cc
...@@ -85,6 +89,7 @@ tools/nvcc_lazy ...@@ -85,6 +89,7 @@ tools/nvcc_lazy
paddle/fluid/operators/generated_op*.cc paddle/fluid/operators/generated_op*.cc
paddle/fluid/operators/generated_sparse_op.cc paddle/fluid/operators/generated_sparse_op.cc
paddle/fluid/operators/generated_static_op.cc paddle/fluid/operators/generated_static_op.cc
paddle/fluid/operators/generated_fused_op.cc
paddle/phi/ops/compat/generated_*.cc paddle/phi/ops/compat/generated_*.cc
paddle/phi/api/yaml/parsed_apis/ paddle/phi/api/yaml/parsed_apis/
paddle/fluid/operators/generator/parsed_ops/ paddle/fluid/operators/generator/parsed_ops/
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/api/include/api.h" #include "paddle/phi/api/include/api.h"
#include "paddle/phi/api/include/fused_api.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
set(api_yaml_path set(api_yaml_path
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml" "${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml"
) )
set(backward_yaml_path set(backward_yaml_path
"${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml" "${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml,${PADDLE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml"
) )
set(tmp_forwards_cc_path set(tmp_forwards_cc_path
"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.cc" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.cc"
......
...@@ -119,12 +119,31 @@ def ReadFwdFile(filepath): ...@@ -119,12 +119,31 @@ def ReadFwdFile(filepath):
# empty file loaded by yaml is None # empty file loaded by yaml is None
contents = yaml.load(f, Loader=yaml.FullLoader) contents = yaml.load(f, Loader=yaml.FullLoader)
f.close() f.close()
# not all fused ops supoort dygraph
if filepath.endswith("fused_ops.yaml") is True:
new_apis = [
api
for api in contents
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
contents = new_apis
return contents if contents is not None else [] return contents if contents is not None else []
def ReadBwdFile(filepath): def ReadBwdFile(filepath):
f = open(filepath, 'r') f = open(filepath, 'r')
contents = yaml.load(f, Loader=yaml.FullLoader) contents = yaml.load(f, Loader=yaml.FullLoader)
# not all fused ops supoort dygraph
if filepath.endswith("fused_backward.yaml") is True:
new_apis = [
api
for api in contents
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
contents = new_apis
ret = {} ret = {}
if contents is not None: if contents is not None:
for content in contents: for content in contents:
......
...@@ -336,6 +336,7 @@ NODE_CC_FILE_TEMPLATE = """ ...@@ -336,6 +336,7 @@ NODE_CC_FILE_TEMPLATE = """
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
#include "paddle/phi/api/backward/backward_api.h" #include "paddle/phi/api/backward/backward_api.h"
#include "paddle/phi/api/backward/fused_backward_api.h"
#include "paddle/phi/api/backward/sparse_bw_api.h" #include "paddle/phi/api/backward/sparse_bw_api.h"
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
......
...@@ -6,6 +6,7 @@ set(op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml) ...@@ -6,6 +6,7 @@ set(op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml)
set(legacy_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml) set(legacy_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_ops.yaml)
set(bw_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml) set(bw_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/backward.yaml)
set(static_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_ops.yaml) set(static_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_ops.yaml)
set(fused_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml)
set(legacy_bw_op_yaml_file set(legacy_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml) ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/legacy_backward.yaml)
set(sparse_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml) set(sparse_op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_ops.yaml)
...@@ -13,6 +14,8 @@ set(sparse_bw_op_yaml_file ...@@ -13,6 +14,8 @@ set(sparse_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml) ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/sparse_backward.yaml)
set(static_bw_op_yaml_file set(static_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_backward.yaml) ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/static_backward.yaml)
set(fused_bw_op_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml)
if(NOT PYTHONINTERP_FOUND) if(NOT PYTHONINTERP_FOUND)
find_package(PythonInterp REQUIRED) find_package(PythonInterp REQUIRED)
...@@ -40,10 +43,14 @@ set(generated_op_path_4 ...@@ -40,10 +43,14 @@ set(generated_op_path_4
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_op4.cc) ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_op4.cc)
set(generated_static_op_path set(generated_static_op_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_static_op.cc) ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_static_op.cc)
set(generated_fused_op_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_fused_op.cc)
set(generated_sparse_ops_path set(generated_sparse_ops_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_sparse_op.cc) ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_sparse_op.cc)
set(generated_argument_mapping_path set(generated_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc) ${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc)
set(generated_fused_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_fused_sig.cc)
set(generated_static_argument_mapping_path set(generated_static_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_static_sig.cc) ${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_static_sig.cc)
set(generated_sparse_argument_mapping_path set(generated_sparse_argument_mapping_path
...@@ -54,7 +61,9 @@ message( ...@@ -54,7 +61,9 @@ message(
- ${op_yaml_file} - ${op_yaml_file}
- ${legacy_op_yaml_file} - ${legacy_op_yaml_file}
- ${bw_op_yaml_file} - ${bw_op_yaml_file}
- ${legacy_bw_op_yaml_file}") - ${legacy_bw_op_yaml_file}
- ${fused_op_yaml_file}
- ${static_op_yaml_file}")
execute_process( execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir} COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
...@@ -69,6 +78,8 @@ execute_process( ...@@ -69,6 +78,8 @@ execute_process(
--output_path ./parsed_ops/legacy_backward_ops.parsed.yaml --backward --output_path ./parsed_ops/legacy_backward_ops.parsed.yaml --backward
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_op_yaml_file} COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_op_yaml_file}
--output_path ./parsed_ops/static_ops.parsed.yaml --output_path ./parsed_ops/static_ops.parsed.yaml
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${fused_op_yaml_file}
--output_path ./parsed_ops/fused_ops.parsed.yaml
COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${sparse_op_yaml_file} COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${sparse_op_yaml_file}
--output_path ./parsed_ops/sparse_ops.parsed.yaml --output_path ./parsed_ops/sparse_ops.parsed.yaml
COMMAND COMMAND
...@@ -77,6 +88,9 @@ execute_process( ...@@ -77,6 +88,9 @@ execute_process(
COMMAND COMMAND
${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_bw_op_yaml_file} ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${static_bw_op_yaml_file}
--output_path ./parsed_ops/static_backward.parsed.yaml --backward --output_path ./parsed_ops/static_backward.parsed.yaml --backward
COMMAND
${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${fused_bw_op_yaml_file}
--output_path ./parsed_ops/fused_backward.parsed.yaml --backward
RESULTS_VARIABLE _results) RESULTS_VARIABLE _results)
foreach(_result in ${_results}) foreach(_result in ${_results})
if(${_result}) if(${_result})
...@@ -111,6 +125,17 @@ if(${_result}) ...@@ -111,6 +125,17 @@ if(${_result})
message(FATAL_ERROR "static ops validation failed, exiting.") message(FATAL_ERROR "static ops validation failed, exiting.")
endif() endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
${PYTHON_EXECUTABLE} cross_validate.py --forward_yaml_paths
./parsed_ops/fused_ops.parsed.yaml --backward_yaml_paths
./parsed_ops/fused_backward.parsed.yaml
RESULT_VARIABLE _result)
if(${_result})
message(FATAL_ERROR "fused ops validation failed, exiting.")
endif()
execute_process( execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND COMMAND
...@@ -158,6 +183,21 @@ if(${_result}) ...@@ -158,6 +183,21 @@ if(${_result})
message(FATAL_ERROR "operator codegen failed, exiting.") message(FATAL_ERROR "operator codegen failed, exiting.")
endif() endif()
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND
${PYTHON_EXECUTABLE} generate_op.py --ops_yaml_path
./parsed_ops/fused_ops.parsed.yaml --backward_yaml_path
./parsed_ops/fused_backward.parsed.yaml --op_version_yaml_path
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/op_version.yaml
--op_compat_yaml_path ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/op_compat.yaml
--output_op_path "${generated_fused_op_path}.tmp" --output_arg_map_path
"${generated_fused_argument_mapping_path}.tmp"
RESULT_VARIABLE _result)
if(${_result})
message(FATAL_ERROR "operator codegen failed, exiting.")
endif()
execute_process( execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND COMMAND
...@@ -177,10 +217,12 @@ set(generated_static_files ...@@ -177,10 +217,12 @@ set(generated_static_files
"${generated_op_path_3}" "${generated_op_path_3}"
"${generated_op_path_4}" "${generated_op_path_4}"
"${generated_static_op_path}" "${generated_static_op_path}"
"${generated_fused_op_path}"
"${generated_sparse_ops_path}" "${generated_sparse_ops_path}"
"${generated_argument_mapping_path}" "${generated_argument_mapping_path}"
"${generated_static_argument_mapping_path}" "${generated_static_argument_mapping_path}"
"${generated_sparse_argument_mapping_path}") "${generated_sparse_argument_mapping_path}"
"${generated_fused_argument_mapping_path}")
foreach(generated_static_file ${generated_static_files}) foreach(generated_static_file ${generated_static_files})
if(EXISTS "${generated_static_file}.tmp" AND EXISTS if(EXISTS "${generated_static_file}.tmp" AND EXISTS
......
...@@ -340,6 +340,7 @@ def check_op_config(op_entry, op_name): ...@@ -340,6 +340,7 @@ def check_op_config(op_entry, op_name):
'no_need_buffer', 'no_need_buffer',
'data_transform', 'data_transform',
'composite', 'composite',
'support_dygraph_mode',
) )
infer_meta_key_set = ('func', 'param') infer_meta_key_set = ('func', 'param')
kernel_key_set = ( kernel_key_set = (
......
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
// new phi apis // new phi apis
#include "paddle/phi/api/include/api.h" #include "paddle/phi/api/include/api.h"
#include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/api/include/fused_api.h"
#include "paddle/phi/api/include/sparse_api.h" #include "paddle/phi/api/include/sparse_api.h"
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
......
...@@ -67,6 +67,26 @@ set(dygraph_api_source_file ...@@ -67,6 +67,26 @@ set(dygraph_api_source_file
set(dygraph_api_header_file_tmp ${dygraph_api_header_file}.tmp) set(dygraph_api_header_file_tmp ${dygraph_api_header_file}.tmp)
set(dygraph_api_source_file_tmp ${dygraph_api_source_file}.tmp) set(dygraph_api_source_file_tmp ${dygraph_api_source_file}.tmp)
# fused_op forward api file
set(fused_api_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_ops.yaml)
set(fused_api_header_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/include/fused_api.h)
set(fused_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/fused_api.cc)
set(fused_api_header_file_tmp ${fused_api_header_file}.tmp)
set(fused_api_source_file_tmp ${fused_api_source_file}.tmp)
# fused_op backward api file
set(fused_bw_api_gen_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/backward_api_gen.py)
set(fused_bw_api_yaml_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/fused_backward.yaml)
set(fused_bw_api_header_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/fused_backward_api.h)
set(fused_bw_api_source_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/fused_backward_api.cc)
set(fused_bw_api_header_file_tmp ${fused_bw_api_header_file}.tmp)
set(fused_bw_api_source_file_tmp ${fused_bw_api_source_file}.tmp)
# sparse api file # sparse api file
set(sparse_api_gen_file set(sparse_api_gen_file
${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/sparse_api_gen.py) ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/sparse_api_gen.py)
...@@ -171,6 +191,40 @@ add_custom_command( ...@@ -171,6 +191,40 @@ add_custom_command(
${legacy_bw_api_yaml_file} ${legacy_bw_api_yaml_file}
VERBATIM) VERBATIM)
# generate fused_op api
add_custom_command(
OUTPUT ${fused_api_header_file} ${fused_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file}
--is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp}
--api_source_path ${fused_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp}
${fused_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp}
${fused_api_source_file}
COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}"
DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base}
VERBATIM)
# generate fused_op backward api
add_custom_command(
OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file}
${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp}
COMMAND
${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path
${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path
${fused_bw_api_header_file_tmp} --backward_source_path
${fused_bw_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp}
${fused_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp}
${fused_bw_api_source_file}
COMMENT
"copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}"
DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
VERBATIM)
# generate sparse api # generate sparse api
add_custom_command( add_custom_command(
OUTPUT ${sparse_api_header_file} ${sparse_api_source_file} OUTPUT ${sparse_api_header_file} ${sparse_api_source_file}
...@@ -333,7 +387,7 @@ cc_library( ...@@ -333,7 +387,7 @@ cc_library(
phi_profiler) phi_profiler)
cc_library( cc_library(
phi_function_api phi_function_api
SRCS ${api_source_file} SRCS ${api_source_file} ${fused_api_source_file}
DEPS phi_tensor_raw DEPS phi_tensor_raw
phi phi
kernel_dispatch kernel_dispatch
...@@ -344,7 +398,7 @@ cc_library( ...@@ -344,7 +398,7 @@ cc_library(
phi_profiler) phi_profiler)
cc_library( cc_library(
phi_bw_function_api phi_bw_function_api
SRCS ${bw_api_source_file} SRCS ${bw_api_source_file} ${fused_bw_api_source_file}
DEPS phi_tensor_raw DEPS phi_tensor_raw
phi phi
kernel_dispatch kernel_dispatch
......
...@@ -605,16 +605,6 @@ ...@@ -605,16 +605,6 @@
kernel : kernel :
func : frame_grad func : frame_grad
- backward_op : fused_dropout_add_grad
forward : fused_dropout_add (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
args : (Tensor seed_offset, Tensor out_grad, Scalar p, bool is_test, str mode, bool fix_seed)
output : Tensor(x_grad), Tensor(y_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out_grad, out_grad]
kernel :
func : fused_dropout_add_grad
- backward_op : gather_nd_grad - backward_op : gather_nd_grad
forward : gather_nd (Tensor x, Tensor index) -> Tensor(out) forward : gather_nd (Tensor x, Tensor index) -> Tensor(out)
args : (Tensor x, Tensor index, Tensor out_grad) args : (Tensor x, Tensor index, Tensor out_grad)
......
# This file is designed for fusion C++ backward operators, which manages the
# generated code for dynamic mode and static mode.
# The operators in the file have extra configuration item "support_dygraph_mode".
# If one operator have "support_dygraph_mode : True", it supports dygraph mode.
- backward_op : fused_dropout_add_grad
forward : fused_dropout_add (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(seed_offset)
args : (Tensor seed_offset, Tensor out_grad, Scalar p, bool is_test, str mode, bool fix_seed)
output : Tensor(x_grad), Tensor(y_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out_grad, out_grad]
kernel :
func : fused_dropout_add_grad
support_dygraph_mode : true
# This file is designed for fusion C++ farward operators, which manages the
# generated code for dynamic mode and static mode.
# The operators in the file have extra configuration item "support_dygraph_mode".
# If one operator have "support_dygraph_mode : True", it supports dygraph mode.
- op : embedding_with_eltwise_add_xpu
args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx)
output: Tensor
infer_meta :
func: EmbeddingWithEltwiseAddXPUInferMeta
kernel:
func: embedding_with_eltwise_add_xpu
data_type: tables
- op : fc_xpu
args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output : Tensor(out), Tensor(out_max)
infer_meta :
func : FcXPUInferMeta
kernel :
func : fc_xpu
data_type : x
optional : bias, x_max
- op : fused_dropout_add
args : (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
output : Tensor(out), Tensor(seed_offset)
infer_meta :
func : FusedDropoutAddInferMeta
kernel :
func : fused_dropout_add
data_type : x
backward : fused_dropout_add_grad
support_dygraph_mode : true
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
support_dygraph_mode : true
- op : fused_multi_transformer_xpu
args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id)
output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
infer_meta :
func : FusedMultiTransformerXpuInferMeta
kernel :
func : fused_multi_transformer_xpu
data_type : x
optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask
- op : generate_sequence_xpu
args : (Tensor x, DataType dtype)
output : Tensor
infer_meta :
func : GenerateSequenceXPUInferMeta
kernel :
func : generate_sequence_xpu
data_type : dtype
- op : multi_encoder_xpu
args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
infer_meta :
func : MultiEncoderXPUInferMeta
kernel :
func : multi_encoder_xpu
data_type : x
optional : mask, x_fp16, out_fp16
...@@ -406,7 +406,9 @@ PD_DECLARE_API(from_blob); ...@@ -406,7 +406,9 @@ PD_DECLARE_API(from_blob);
""" """
def generate_api(api_yaml_path, header_file_path, source_file_path): def generate_api(
api_yaml_path, is_fused_ops_yaml, header_file_path, source_file_path
):
apis = [] apis = []
for each_api_yaml in api_yaml_path: for each_api_yaml in api_yaml_path:
...@@ -424,7 +426,21 @@ def generate_api(api_yaml_path, header_file_path, source_file_path): ...@@ -424,7 +426,21 @@ def generate_api(api_yaml_path, header_file_path, source_file_path):
header_file.write(header_include()) header_file.write(header_include())
header_file.write(namespace[0]) header_file.write(namespace[0])
include_header_file = "paddle/phi/api/include/api.h" include_header_file = (
"paddle/phi/api/include/fused_api.h"
if is_fused_ops_yaml is True
else "paddle/phi/api/include/api.h"
)
# not all fused ops supoort dygraph
if is_fused_ops_yaml is True:
new_apis = [
api
for api in apis
if "support_dygraph_mode" in api
and api["support_dygraph_mode"] is True
]
apis = new_apis
source_file.write(source_include(include_header_file)) source_file.write(source_include(include_header_file))
source_file.write(namespace[0]) source_file.write(namespace[0])
...@@ -456,6 +472,12 @@ def main(): ...@@ -456,6 +472,12 @@ def main():
default=['paddle/phi/api/yaml/ops.yaml'], default=['paddle/phi/api/yaml/ops.yaml'],
) )
parser.add_argument(
'--is_fused_ops_yaml',
help='flag of fused ops yaml',
action='store_true',
)
parser.add_argument( parser.add_argument(
'--api_header_path', '--api_header_path',
help='output of generated api header code file', help='output of generated api header code file',
...@@ -471,10 +493,13 @@ def main(): ...@@ -471,10 +493,13 @@ def main():
options = parser.parse_args() options = parser.parse_args()
api_yaml_path = options.api_yaml_path api_yaml_path = options.api_yaml_path
is_fused_ops_yaml = options.is_fused_ops_yaml
header_file_path = options.api_header_path header_file_path = options.api_header_path
source_file_path = options.api_source_path source_file_path = options.api_source_path
generate_api(api_yaml_path, header_file_path, source_file_path) generate_api(
api_yaml_path, is_fused_ops_yaml, header_file_path, source_file_path
)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -269,7 +269,7 @@ def header_include(): ...@@ -269,7 +269,7 @@ def header_include():
""" """
def source_include(header_file_path): def source_include(header_file_path, fw_header_file_path):
return f""" return f"""
#include "{header_file_path}" #include "{header_file_path}"
#include <memory> #include <memory>
...@@ -282,7 +282,7 @@ def source_include(header_file_path): ...@@ -282,7 +282,7 @@ def source_include(header_file_path):
#include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/api/lib/kernel_dispatch.h"
#include "paddle/phi/common/type_traits.h" #include "paddle/phi/common/type_traits.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/api/include/api.h" #include "{fw_header_file_path}"
#include "paddle/phi/infermeta/backward.h" #include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
...@@ -310,7 +310,10 @@ namespace experimental { ...@@ -310,7 +310,10 @@ namespace experimental {
def generate_backward_api( def generate_backward_api(
backward_yaml_path, header_file_path, source_file_path backward_yaml_path,
is_fused_backward_yaml,
header_file_path,
source_file_path,
): ):
bw_apis = [] bw_apis = []
...@@ -329,9 +332,29 @@ def generate_backward_api( ...@@ -329,9 +332,29 @@ def generate_backward_api(
header_file.write(header_include()) header_file.write(header_include())
header_file.write(namespace[0]) header_file.write(namespace[0])
include_header_file = "paddle/phi/api/backward/backward_api.h" include_header_file = (
source_file.write(source_include(include_header_file)) "paddle/phi/api/backward/fused_backward_api.h"
if is_fused_backward_yaml
else "paddle/phi/api/backward/backward_api.h"
)
include_fw_header_file = (
"paddle/phi/api/include/fused_api.h"
if is_fused_backward_yaml
else "paddle/phi/api/include/api.h"
)
source_file.write(
source_include(include_header_file, include_fw_header_file)
)
source_file.write(namespace[0]) source_file.write(namespace[0])
# not all fused ops supoort dygraph
if is_fused_backward_yaml is True:
new_bw_apis = [
bw_api
for bw_api in bw_apis
if "support_dygraph_mode" in bw_api
and bw_api["support_dygraph_mode"] is True
]
bw_apis = new_bw_apis
for bw_api in bw_apis: for bw_api in bw_apis:
bw_api = BackwardAPI(bw_api) bw_api = BackwardAPI(bw_api)
...@@ -355,6 +378,13 @@ def main(): ...@@ -355,6 +378,13 @@ def main():
nargs='+', nargs='+',
default=['paddle/phi/api/yaml/backward.yaml'], default=['paddle/phi/api/yaml/backward.yaml'],
) )
parser.add_argument(
'--is_fused_backward_yaml',
help='flag of fused backward yaml',
action='store_true',
)
parser.add_argument( parser.add_argument(
'--backward_header_path', '--backward_header_path',
help='output of generated backward header code file', help='output of generated backward header code file',
...@@ -370,11 +400,15 @@ def main(): ...@@ -370,11 +400,15 @@ def main():
options = parser.parse_args() options = parser.parse_args()
backward_yaml_path = options.backward_yaml_path backward_yaml_path = options.backward_yaml_path
is_fused_backward_yaml = options.is_fused_backward_yaml
header_file_path = options.backward_header_path header_file_path = options.backward_header_path
source_file_path = options.backward_source_path source_file_path = options.backward_source_path
generate_backward_api( generate_backward_api(
backward_yaml_path, header_file_path, source_file_path backward_yaml_path,
is_fused_backward_yaml,
header_file_path,
source_file_path,
) )
......
...@@ -726,16 +726,6 @@ ...@@ -726,16 +726,6 @@
optional : skip_update, master_params optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_linear_param_grad_add
args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
output : Tensor(dweight_out), Tensor(dbias_out)
infer_meta:
func : FusedLinearParamGradAddInferMeta
optional : dweight, dbias
kernel:
func : fused_linear_param_grad_add
data_type : dout
- op : gather - op : gather
args : (Tensor x, Tensor index, Scalar(int) axis=0) args : (Tensor x, Tensor index, Scalar(int) axis=0)
output : Tensor(out) output : Tensor(out)
......
...@@ -584,16 +584,6 @@ ...@@ -584,16 +584,6 @@
func : frame func : frame
backward : frame_grad backward : frame_grad
- op : fused_dropout_add
args : (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
output : Tensor(out), Tensor(seed_offset)
infer_meta :
func : FusedDropoutAddInferMeta
kernel :
func : fused_dropout_add
data_type : x
backward : fused_dropout_add_grad
- op : gather_nd - op : gather_nd
args : (Tensor x, Tensor index) args : (Tensor x, Tensor index)
output : Tensor output : Tensor
......
...@@ -47,15 +47,6 @@ ...@@ -47,15 +47,6 @@
func : broadcast func : broadcast
param: [x, root] param: [x, root]
- op : embedding_with_eltwise_add_xpu
args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx)
output: Tensor
infer_meta :
func: EmbeddingWithEltwiseAddXPUInferMeta
kernel:
func: embedding_with_eltwise_add_xpu
data_type: tables
- op : equal - op : equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false) args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out) output : Tensor(out)
...@@ -68,16 +59,6 @@ ...@@ -68,16 +59,6 @@
backend : x backend : x
force_backend : force_cpu force_backend : force_cpu
- op : fc_xpu
args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output : Tensor(out), Tensor(out_max)
infer_meta :
func : FcXPUInferMeta
kernel :
func : fc_xpu
data_type : x
optional : bias, x_max
- op : frobenius_norm - op : frobenius_norm
args : (Tensor x, IntArray axis={0}, bool keepdim=false, bool reduce_all=false, int in_dtype=-1, int out_dtype=-1) args : (Tensor x, IntArray axis={0}, bool keepdim=false, bool reduce_all=false, int in_dtype=-1, int out_dtype=-1)
output : Tensor(out) output : Tensor(out)
...@@ -88,25 +69,6 @@ ...@@ -88,25 +69,6 @@
param : [x, axis, keepdim, reduce_all] param : [x, axis, keepdim, reduce_all]
backward : frobenius_norm_grad backward : frobenius_norm_grad
- op : fused_multi_transformer_xpu
args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id)
output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
infer_meta :
func : FusedMultiTransformerXpuInferMeta
kernel :
func : fused_multi_transformer_xpu
data_type : x
optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask
- op : generate_sequence_xpu
args : (Tensor x, DataType dtype)
output : Tensor
infer_meta :
func : GenerateSequenceXPUInferMeta
kernel :
func : generate_sequence_xpu
data_type : dtype
- op : greater_equal - op : greater_equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false) args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out) output : Tensor(out)
...@@ -155,16 +117,6 @@ ...@@ -155,16 +117,6 @@
backend : x backend : x
force_backend : force_cpu force_backend : force_cpu
- op : multi_encoder_xpu
args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
infer_meta :
func : MultiEncoderXPUInferMeta
kernel :
func : multi_encoder_xpu
data_type : x
optional : mask, x_fp16, out_fp16
- op : not_equal - op : not_equal
args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false) args : (Tensor x, Tensor y, int axis = -1, bool force_cpu=false)
output : Tensor(out) output : Tensor(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册