未验证 提交 d3f4596a 编写于 作者: H hong 提交者: GitHub

New ir support fuse bn add act (#56247)

* support new ir load combine

* update

* polish code

* remove print

* update

* update

* update

* polish code

* fix bug

* polish code

* fix compile bug

* fix bug

* revert code

* remove useless code

* polish code
上级 93cbcfaf
......@@ -158,6 +158,8 @@ Node *FuseBatchNormAddActPass::CreateFusedBatchNormAddActNode(
desc.SetInput("Z", std::vector<std::string>({elewise_add_in_n}));
desc.SetInput("Scale", std::vector<std::string>({bn_scale_n}));
desc.SetInput("Bias", std::vector<std::string>({bn_bias_n}));
desc.SetInput("Mean", std::vector<std::string>({bn_mean_out_n}));
desc.SetInput("Variance", std::vector<std::string>({bn_variance_out_n}));
desc.SetOutput("Y", std::vector<std::string>({act_out_n}));
desc.SetOutput("MeanOut", std::vector<std::string>({bn_mean_out_n}));
......
......@@ -18,7 +18,12 @@ namespace paddle {
namespace dialect {
const std::unordered_set<std::string> LegacyOpList = {
"pd.load_combine", "pd.c_concat", "pd.c_broadcast_"};
"pd.load_combine",
"pd.c_concat",
"pd.c_broadcast_",
"pd.fused_bn_add_activation_",
"pd.fused_bn_add_activation_grad",
};
enum class AttrType {
UNDEFINED = 0,
......
......@@ -164,6 +164,12 @@ void FusedBatchNormAddActOpMaker::Make() {
AddInput("Bias",
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Mean",
"Mean is a 1-dimensional tensor of size C "
"that is applied to the output");
AddInput("Variance",
"Variance is a 1-dimensional tensor of size C "
"that is applied to the output");
AddOutput("Y", "result after normalization");
AddOutput("MeanOut",
"Share memory with Mean. "
......
......@@ -242,6 +242,30 @@
kernel :
func : frobenius_norm_grad
- backward_op : fused_batch_norm_act_grad
forward : fused_batch_norm_act (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [x, scale, bias]
kernel :
func : fused_batch_norm_act_grad
data_type : out_grad
optional : reserve_space
- backward_op : fused_bn_add_activation_grad
forward : fused_bn_add_activation (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str act_type)
output : Tensor(x_grad), Tensor(z_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta :
func : GeneralQuaternaryGradInferMeta
param : [x, x, scale, bias]
kernel :
func : fused_bn_add_activation_grad
data_type : out_grad
optional : reserve_space
- backward_op : fused_softmax_mask_upper_triangle_grad
forward : fused_softmax_mask_upper_triangle(Tensor X) -> Tensor(Out)
args: (Tensor Out, Tensor Out_grad)
......
......@@ -447,6 +447,30 @@
optional : skip_update, master_params
inplace : (params -> params_out), (moments1 -> moments1_out), (moments2 -> moments2_out), (beta1_pows -> beta1_pows_out), (beta2_pows -> beta2_pows_out), (master_params -> master_params_out)
- op : fused_batch_norm_act
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta:
func : FusedBatchNormActInferMeta
param : [x, scale, bias, mean, variance]
kernel :
func : fused_batch_norm_act
data_type : x
view : (mean -> mean_out), (variance -> variance_out)
backward : fused_batch_norm_act_grad
- op : fused_bn_add_activation
args : (Tensor x, Tensor z, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str act_type)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta:
func : FusedBatchNormActInferMeta
param : [x, scale, bias, mean, variance]
kernel :
func : fused_bn_add_activation
data_type : x
view : (mean -> mean_out), (variance -> variance_out)
backward : fused_bn_add_activation_grad
- op : fused_softmax_mask_upper_triangle
args : (Tensor X)
output : Tensor(Out)
......
......@@ -1173,6 +1173,39 @@
data_type : float
support_tensor : true
- op : fused_batch_norm_act
backward : fused_batch_norm_act_grad
inputs:
x : X
mean : Mean
variance : Variance
scale : Scale
bias : Bias
outputs :
out : Y
mean_out: MeanOut
variance_out: VarianceOut
saved_mean: SavedMean
saved_variance: SavedVariance
reserve_space: ReserveSpace
- op : fused_bn_add_activation_ (fused_bn_add_activation)
backward : fused_bn_add_activation_grad
inputs:
x : X
z : Z
mean : Mean
variance : Variance
scale : Scale
bias : Bias
outputs :
out : Y
mean_out: MeanOut
variance_out: VarianceOut
saved_mean: SavedMean
saved_variance: SavedVariance
reserve_space: ReserveSpace
- op : fused_conv2d
extra :
attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
......
......@@ -1437,6 +1437,36 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
sequencenum->set_dtype(DataType::FLOAT32);
}
void FusedBatchNormActInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space) {
BatchNormInferMeta(x,
mean,
variance,
scale,
bias,
false,
0.0,
0.0,
"NHWC",
false,
false,
y,
mean_out,
variance_out,
saved_mean,
saved_variance,
reserve_space);
}
void FusedBiasActInferMeta(const MetaTensor& x,
const MetaTensor& bias,
const MetaTensor& dequant_scales,
......
......@@ -308,6 +308,18 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
MetaTensor* sequencenum,
MetaTensor* out);
void FusedBatchNormActInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space);
void FusedBiasActInferMeta(const MetaTensor& x,
const MetaTensor& bias,
const MetaTensor& dequant_scales,
......
......@@ -42,10 +42,10 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx,
const DenseTensor &x,
const DenseTensor &scale,
const DenseTensor &bias,
const DenseTensor &y,
const DenseTensor &saved_mean,
const DenseTensor &saved_variance,
const DenseTensor &reserve_space,
const DenseTensor &y,
const DenseTensor &y_grad,
float momentum,
float epsilon,
......
......@@ -35,10 +35,10 @@ KernelSignature BatchNormActGradFuseOpArgumentMapping(
{"X",
"Scale",
"Bias",
"Y",
"SavedMean",
"SavedVariance",
"ReserveSpace",
"Y",
"Y@GRAD"},
{"momentum", "epsilon", "act_type"},
{"X@GRAD", "Scale@GRAD", "Bias@GRAD"});
......
......@@ -1476,6 +1476,8 @@ def fused_bn_add_act(
"Z": y,
"Scale": scale,
"Bias": bias,
"Mean": mean,
"Variance": variance,
}
attrs = {"epsilon": epsilon, 'momentum': momentum}
......
......@@ -256,16 +256,21 @@ void ComputeFusedBNAddReluForward(const phi::GPUContext &ctx,
framework::AttributeMap attrs;
auto op = framework::OpRegistry::CreateOp(
"fused_bn_add_activation",
{{"X", {"X"}}, {"Z", {"Z"}}, {"Scale", {"Scale"}}, {"Bias", {"Bias"}}},
{{"Y", {"Y"}},
{"MeanOut", {"Mean"}},
{"VarianceOut", {"Variance"}},
{"SavedMean", {"SavedMean"}},
{"SavedVariance", {"SavedVariance"}},
{"ReserveSpace", {"ReserveSpace"}}},
attrs);
auto op =
framework::OpRegistry::CreateOp("fused_bn_add_activation",
{{"X", {"X"}},
{"Z", {"Z"}},
{"Scale", {"Scale"}},
{"Bias", {"Bias"}},
{"Mean", {"Mean"}},
{"Variance", {"Variance"}}},
{{"Y", {"Y"}},
{"MeanOut", {"Mean"}},
{"VarianceOut", {"Variance"}},
{"SavedMean", {"SavedMean"}},
{"SavedVariance", {"SavedVariance"}},
{"ReserveSpace", {"ReserveSpace"}}},
attrs);
op->Run(scope, ctx.GetPlace());
paddle::framework::TensorCopySync(*y, platform::CPUPlace(), cpu_y);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册