未验证 提交 03ba0fda 编写于 作者: Y Yiqun Liu 提交者: GitHub

Move the codes of fused operators to operators/fused directory. (#20881)

* Move the codes of fused operators to operators/fused directory.
test=develop

* Correct the op name in cmake.

* Change the use of PADDLE_ENFORCE.
test=develop
上级 a9bc92c3
...@@ -57,8 +57,8 @@ if(WITH_COVERAGE OR NOT WITH_AVX OR WIN32) ...@@ -57,8 +57,8 @@ if(WITH_COVERAGE OR NOT WITH_AVX OR WIN32)
SET(OP_MKL_DEPS ${OP_MKL_DEPS} pyramid_hash_op) SET(OP_MKL_DEPS ${OP_MKL_DEPS} pyramid_hash_op)
endif() endif()
register_operators(EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op register_operators(EXCLUDES py_func_op warpctc_op dgc_op
sync_batch_norm_op multihead_matmul_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS}) sync_batch_norm_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
if (WITH_GPU) if (WITH_GPU)
# warpctc_op needs cudnn 7 above # warpctc_op needs cudnn 7 above
...@@ -67,17 +67,10 @@ if (WITH_GPU) ...@@ -67,17 +67,10 @@ if (WITH_GPU)
else() else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif() endif()
# conv_fusion_op needs cudnn 7 above
if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
op_library(conv_fusion_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
endif()
if (NOT WIN32) if (NOT WIN32)
op_library(sync_batch_norm_op) op_library(sync_batch_norm_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n") file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n")
endif() endif()
op_library(multihead_matmul_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(multihead_matmul);\n")
else() else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif() endif()
......
include(operators) include(operators)
register_operators(EXCLUDES fusion_transpose_flatten_concat_op fusion_conv_inception_op fused_fc_elementwise_layernorm_op) register_operators(EXCLUDES
conv_fusion_op
fusion_transpose_flatten_concat_op
fusion_conv_inception_op
fused_fc_elementwise_layernorm_op
multihead_matmul_op)
if (WITH_GPU) if (WITH_GPU)
op_library(fusion_transpose_flatten_concat_op) # conv_fusion_op needs cudnn 7 above
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fusion_transpose_flatten_concat);\n") if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
if (NOT ${CUDNN_VERSION} VERSION_LESS 7100) op_library(conv_fusion_op)
op_library(fusion_conv_inception_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_inception_fusion);\n") endif()
endif() # fusion_transpose_flatten_concat_op
op_library(fused_fc_elementwise_layernorm_op) op_library(fusion_transpose_flatten_concat_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_fc_elementwise_layernorm);\n") file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fusion_transpose_flatten_concat);\n")
# fusion_conv_inception_op needs cudnn 7 above
if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
op_library(fusion_conv_inception_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_inception_fusion);\n")
endif()
# fused_fc_elementwise_layernorm_op
op_library(fused_fc_elementwise_layernorm_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_fc_elementwise_layernorm);\n")
# multihead_matmul_op
op_library(multihead_matmul_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(multihead_matmul);\n")
endif() endif()
...@@ -62,10 +62,10 @@ class Conv2DFusionOpMaker : public Conv2DOpMaker { ...@@ -62,10 +62,10 @@ class Conv2DFusionOpMaker : public Conv2DOpMaker {
class Conv2DFusionOpInferShape : public framework::InferShapeBase { class Conv2DFusionOpInferShape : public framework::InferShapeBase {
public: public:
void operator()(framework::InferShapeContext* ctx) const override { void operator()(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true,
"Input(Input) of ConvOp should not be null."); "Input(Input) of ConvOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE_EQ(ctx->HasInput("Filter"), true,
"Input(Filter) of ConvOp should not be null."); "Input(Filter) of ConvOp should not be null.");
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
...@@ -79,14 +79,14 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase { ...@@ -79,14 +79,14 @@ class Conv2DFusionOpInferShape : public framework::InferShapeBase {
oshape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], oshape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i], strides[i])); dilations[i], paddings[i], strides[i]));
} }
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE_EQ(ctx->HasOutput("Output"), true,
"Output(Output) of ConvOp should not be null."); "Output(Output) of ConvOp should not be null.");
ctx->SetOutputDim("Output", framework::make_ddim(oshape)); ctx->SetOutputDim("Output", framework::make_ddim(oshape));
std::vector<int> channels = std::vector<int> channels =
ctx->Attrs().Get<std::vector<int>>("split_channels"); ctx->Attrs().Get<std::vector<int>>("split_channels");
if (channels.size()) { if (channels.size()) {
PADDLE_ENFORCE(ctx->HasOutputs("Outputs"), PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true,
"Output(Outputs) of ConvOp should not be null."); "Output(Outputs) of ConvOp should not be null.");
std::vector<framework::DDim> oshapes; std::vector<framework::DDim> oshapes;
oshapes.reserve(channels.size()); oshapes.reserve(channels.size());
for (size_t i = 0; i < channels.size(); ++i) { for (size_t i = 0; i < channels.size(); ++i) {
......
...@@ -41,7 +41,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -41,7 +41,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter"); auto* filter = ctx.Input<Tensor>("Filter");
auto* bias = ctx.Input<Tensor>("Bias"); auto* bias = ctx.Input<Tensor>("Bias");
PADDLE_ENFORCE(bias, "The bias should not be null."); PADDLE_ENFORCE_NOT_NULL(bias, "The bias should not be null.");
auto* residual = ctx.Input<Tensor>("ResidualData"); auto* residual = ctx.Input<Tensor>("ResidualData");
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册