未验证 提交 d3f4596a 编写于 作者: H hong 提交者: GitHub

New ir support fuse bn add act (#56247)

* support new ir load combine

* update

* polish code

* remove print

* update

* update

* update

* polish code

* fix bug

* polish code

* fix compile bug

* fix bug

* revert code

* remove useless code

* polish code
上级 93cbcfaf
...@@ -158,6 +158,8 @@ Node *FuseBatchNormAddActPass::CreateFusedBatchNormAddActNode( ...@@ -158,6 +158,8 @@ Node *FuseBatchNormAddActPass::CreateFusedBatchNormAddActNode(
desc.SetInput("Z", std::vector<std::string>({elewise_add_in_n})); desc.SetInput("Z", std::vector<std::string>({elewise_add_in_n}));
desc.SetInput("Scale", std::vector<std::string>({bn_scale_n})); desc.SetInput("Scale", std::vector<std::string>({bn_scale_n}));
desc.SetInput("Bias", std::vector<std::string>({bn_bias_n})); desc.SetInput("Bias", std::vector<std::string>({bn_bias_n}));
desc.SetInput("Mean", std::vector<std::string>({bn_mean_out_n}));
desc.SetInput("Variance", std::vector<std::string>({bn_variance_out_n}));
desc.SetOutput("Y", std::vector<std::string>({act_out_n})); desc.SetOutput("Y", std::vector<std::string>({act_out_n}));
desc.SetOutput("MeanOut", std::vector<std::string>({bn_mean_out_n})); desc.SetOutput("MeanOut", std::vector<std::string>({bn_mean_out_n}));
......
...@@ -18,7 +18,12 @@ namespace paddle { ...@@ -18,7 +18,12 @@ namespace paddle {
namespace dialect { namespace dialect {
const std::unordered_set<std::string> LegacyOpList = { const std::unordered_set<std::string> LegacyOpList = {
"pd.load_combine", "pd.c_concat", "pd.c_broadcast_"}; "pd.load_combine",
"pd.c_concat",
"pd.c_broadcast_",
"pd.fused_bn_add_activation_",
"pd.fused_bn_add_activation_grad",
};
enum class AttrType { enum class AttrType {
UNDEFINED = 0, UNDEFINED = 0,
......
...@@ -164,6 +164,12 @@ void FusedBatchNormAddActOpMaker::Make() { ...@@ -164,6 +164,12 @@ void FusedBatchNormAddActOpMaker::Make() {
AddInput("Bias", AddInput("Bias",
"Bias is a 1-dimensional tensor of size C " "Bias is a 1-dimensional tensor of size C "
"that is applied to the output"); "that is applied to the output");
AddInput("Mean",
"Mean is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Variance",
"Variance is a 1-dimensional tensor of size C "
"that is applied to the output");
AddOutput("Y", "result after normalization"); AddOutput("Y", "result after normalization");
AddOutput("MeanOut", AddOutput("MeanOut",
"Share memory with Mean. " "Share memory with Mean. "
......
...@@ -242,6 +242,30 @@ ...@@ -242,6 +242,30 @@
kernel : kernel :
func : frobenius_norm_grad func : frobenius_norm_grad
- backward_op : fused_batch_norm_act_grad
forward : fused_batch_norm_act (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [x, scale, bias]
kernel :
func : fused_batch_norm_act_grad
data_type : out_grad
optional : reserve_space
- backward_op : fused_bn_add_activation_grad
forward : fused_bn_add_activation (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type)
output : Tensor(x_grad), Tensor(z_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
func : GeneralQuaternaryGradInferMeta
param : [x, x, scale, bias]
kernel :
func : fused_bn_add_activation_grad
data_type : out_grad
optional : reserve_space
- backward_op : fused_softmax_mask_upper_triangle_grad - backward_op : fused_softmax_mask_upper_triangle_grad
forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out) forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out)
args: (Tensor Out, Tensor Out_grad) args: (Tensor Out, Tensor Out_grad)
......
...@@ -447,6 +447,30 @@ ...@@ -447,6 +447,30 @@
optional : skip_update, master_params optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out) inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_batch_norm_act
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta:
func : FusedBatchNormActInferMeta
param : [x, scale, bias, mean, variance]
kernel :
func : fused_batch_norm_act
data_type : x
view : (mean -> mean_out), (variance -> variance_out)
backward : fused_batch_norm_act_grad
- op : fused_bn_add_activation
args : (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta:
func : FusedBatchNormActInferMeta
param : [x, scale, bias, mean, variance]
kernel :
func : fused_bn_add_activation
data_type : x
view : (mean -> mean_out), (variance -> variance_out)
backward : fused_bn_add_activation_grad
- op : fused_softmax_mask_upper_triangle - op : fused_softmax_mask_upper_triangle
args : (Tensor X) args : (Tensor X)
output : Tensor(Out) output : Tensor(Out)
......
...@@ -1173,6 +1173,39 @@ ...@@ -1173,6 +1173,39 @@
data_type : float data_type : float
support_tensor : true support_tensor : true
- op : fused_batch_norm_act
backward : fused_batch_norm_act_grad
inputs:
x : X
mean : Mean
variance : Variance
scale : Scale
bias : Bias
outputs :
out : Y
mean_out: MeanOut
variance_out: VarianceOut
saved_mean: SavedMean
saved_variance: SavedVariance
reserve_space: ReserveSpace
- op : fused_bn_add_activation_ (fused_bn_add_activation)
backward : fused_bn_add_activation_grad
inputs:
x : X
z : Z
mean : Mean
variance : Variance
scale : Scale
bias : Bias
outputs :
out : Y
mean_out: MeanOut
variance_out: VarianceOut
saved_mean: SavedMean
saved_variance: SavedVariance
reserve_space: ReserveSpace
- op : fused_conv2d - op : fused_conv2d
extra : extra :
attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f, attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
......
...@@ -1437,6 +1437,36 @@ void EditDistanceInferMeta(const MetaTensor& hyps, ...@@ -1437,6 +1437,36 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
sequencenum->set_dtype(DataType::FLOAT32); sequencenum->set_dtype(DataType::FLOAT32);
} }
void FusedBatchNormActInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space) {
BatchNormInferMeta(x,
mean,
variance,
scale,
bias,
false,
0.0,
0.0,
"NHWC",
false,
false,
y,
mean_out,
variance_out,
saved_mean,
saved_variance,
reserve_space);
}
void FusedBiasActInferMeta(const MetaTensor& x, void FusedBiasActInferMeta(const MetaTensor& x,
const MetaTensor& bias, const MetaTensor& bias,
const MetaTensor& dequant_scales, const MetaTensor& dequant_scales,
......
...@@ -308,6 +308,18 @@ void EditDistanceInferMeta(const MetaTensor& hyps, ...@@ -308,6 +308,18 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
MetaTensor* sequencenum, MetaTensor* sequencenum,
MetaTensor* out); MetaTensor* out);
void FusedBatchNormActInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space);
void FusedBiasActInferMeta(const MetaTensor& x, void FusedBiasActInferMeta(const MetaTensor& x,
const MetaTensor& bias, const MetaTensor& bias,
const MetaTensor& dequant_scales, const MetaTensor& dequant_scales,
......
...@@ -42,10 +42,10 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, ...@@ -42,10 +42,10 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx,
const DenseTensor &x, const DenseTensor &x,
const DenseTensor &scale, const DenseTensor &scale,
const DenseTensor &bias, const DenseTensor &bias,
const DenseTensor &y,
const DenseTensor &saved_mean, const DenseTensor &saved_mean,
const DenseTensor &saved_variance, const DenseTensor &saved_variance,
const DenseTensor &reserve_space, const DenseTensor &reserve_space,
const DenseTensor &y,
const DenseTensor &y_grad, const DenseTensor &y_grad,
float momentum, float momentum,
float epsilon, float epsilon,
......
...@@ -35,10 +35,10 @@ KernelSignature BatchNormActGradFuseOpArgumentMapping( ...@@ -35,10 +35,10 @@ KernelSignature BatchNormActGradFuseOpArgumentMapping(
{"X", {"X",
"Scale", "Scale",
"Bias", "Bias",
"Y",
"SavedMean", "SavedMean",
"SavedVariance", "SavedVariance",
"ReserveSpace", "ReserveSpace",
"Y",
"Y@GRAD"}, "Y@GRAD"},
{"momentum", "epsilon", "act_type"}, {"momentum", "epsilon", "act_type"},
{"X@GRAD", "Scale@GRAD", "Bias@GRAD"}); {"X@GRAD", "Scale@GRAD", "Bias@GRAD"});
......
...@@ -1476,6 +1476,8 @@ def fused_bn_add_act( ...@@ -1476,6 +1476,8 @@ def fused_bn_add_act(
"Z": y, "Z": y,
"Scale": scale, "Scale": scale,
"Bias": bias, "Bias": bias,
"Mean": mean,
"Variance": variance,
} }
attrs = {"epsilon": epsilon, 'momentum': momentum} attrs = {"epsilon": epsilon, 'momentum': momentum}
......
...@@ -256,16 +256,21 @@ void ComputeFusedBNAddReluForward(const phi::GPUContext &ctx, ...@@ -256,16 +256,21 @@ void ComputeFusedBNAddReluForward(const phi::GPUContext &ctx,
framework::AttributeMap attrs; framework::AttributeMap attrs;
auto op = framework::OpRegistry::CreateOp( auto op =
"fused_bn_add_activation", framework::OpRegistry::CreateOp("fused_bn_add_activation",
{{"X", {"X"}}, {"Z", {"Z"}}, {"Scale", {"Scale"}}, {"Bias", {"Bias"}}}, {{"X", {"X"}},
{{"Y", {"Y"}}, {"Z", {"Z"}},
{"MeanOut", {"Mean"}}, {"Scale", {"Scale"}},
{"VarianceOut", {"Variance"}}, {"Bias", {"Bias"}},
{"SavedMean", {"SavedMean"}}, {"Mean", {"Mean"}},
{"SavedVariance", {"SavedVariance"}}, {"Variance", {"Variance"}}},
{"ReserveSpace", {"ReserveSpace"}}}, {{"Y", {"Y"}},
attrs); {"MeanOut", {"Mean"}},
{"VarianceOut", {"Variance"}},
{"SavedMean", {"SavedMean"}},
{"SavedVariance", {"SavedVariance"}},
{"ReserveSpace", {"ReserveSpace"}}},
attrs);
op->Run(scope, ctx.GetPlace()); op->Run(scope, ctx.GetPlace());
paddle::framework::TensorCopySync(*y, platform::CPUPlace(), cpu_y); paddle::framework::TensorCopySync(*y, platform::CPUPlace(), cpu_y);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册