未验证 提交 fe8c6796 编写于 作者: Y YuanRisheng 提交者: GitHub

[PHI]Standardise some C++ API (Part3) (#47532)

* Standardise batch norm

* standardize conv3d and depwise_conv2d

* fix ci bugs
上级 cad2e68d
...@@ -242,7 +242,6 @@ class InplaceABNKernel : public framework::OpKernel<T> { ...@@ -242,7 +242,6 @@ class InplaceABNKernel : public framework::OpKernel<T> {
auto is_test = ctx.Attr<bool>("is_test"); auto is_test = ctx.Attr<bool>("is_test");
auto use_global_stats = ctx.Attr<bool>("use_global_stats"); auto use_global_stats = ctx.Attr<bool>("use_global_stats");
auto trainable_statistics = ctx.Attr<bool>("trainable_statistics"); auto trainable_statistics = ctx.Attr<bool>("trainable_statistics");
auto fuse_with_relu = ctx.Attr<bool>("fuse_with_relu");
auto* mean_out = ctx.Output<phi::DenseTensor>("MeanOut"); auto* mean_out = ctx.Output<phi::DenseTensor>("MeanOut");
auto* variance_out = ctx.Output<phi::DenseTensor>("VarianceOut"); auto* variance_out = ctx.Output<phi::DenseTensor>("VarianceOut");
...@@ -255,17 +254,16 @@ class InplaceABNKernel : public framework::OpKernel<T> { ...@@ -255,17 +254,16 @@ class InplaceABNKernel : public framework::OpKernel<T> {
static_cast<const typename framework::ConvertToPhiContext< static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx), DeviceContext>::TYPE&>(dev_ctx),
*x, *x,
*scale,
*bias,
*mean, *mean,
*variance, *variance,
*scale,
*bias,
is_test,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
y, y,
mean_out, mean_out,
variance_out, variance_out,
...@@ -315,7 +313,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> { ...@@ -315,7 +313,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> {
auto is_test = ctx.Attr<bool>("is_test"); auto is_test = ctx.Attr<bool>("is_test");
auto use_global_stats = ctx.Attr<bool>("use_global_stats"); auto use_global_stats = ctx.Attr<bool>("use_global_stats");
auto trainable_statistics = ctx.Attr<bool>("trainable_statistics"); auto trainable_statistics = ctx.Attr<bool>("trainable_statistics");
auto fuse_with_relu = ctx.Attr<bool>("fuse_with_relu");
auto* scale_grad = auto* scale_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Scale")); ctx.Output<phi::DenseTensor>(framework::GradVarName("Scale"));
...@@ -361,7 +358,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> { ...@@ -361,7 +358,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> {
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
true, true,
d_x, d_x,
scale_grad, scale_grad,
......
...@@ -48,7 +48,6 @@ class InplaceABNKernel : public framework::OpKernel<T> { ...@@ -48,7 +48,6 @@ class InplaceABNKernel : public framework::OpKernel<T> {
auto is_test = ctx.Attr<bool>("is_test"); auto is_test = ctx.Attr<bool>("is_test");
auto use_global_stats = ctx.Attr<bool>("use_global_stats"); auto use_global_stats = ctx.Attr<bool>("use_global_stats");
auto trainable_statistics = ctx.Attr<bool>("trainable_statistics"); auto trainable_statistics = ctx.Attr<bool>("trainable_statistics");
auto fuse_with_relu = ctx.Attr<bool>("fuse_with_relu");
auto* mean_out = ctx.Output<phi::DenseTensor>("MeanOut"); auto* mean_out = ctx.Output<phi::DenseTensor>("MeanOut");
auto* variance_out = ctx.Output<phi::DenseTensor>("VarianceOut"); auto* variance_out = ctx.Output<phi::DenseTensor>("VarianceOut");
...@@ -62,17 +61,16 @@ class InplaceABNKernel : public framework::OpKernel<T> { ...@@ -62,17 +61,16 @@ class InplaceABNKernel : public framework::OpKernel<T> {
static_cast<const typename framework::ConvertToPhiContext< static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx), DeviceContext>::TYPE&>(dev_ctx),
*x, *x,
*scale,
*bias,
*mean, *mean,
*variance, *variance,
*scale,
*bias,
is_test,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
y, y,
mean_out, mean_out,
variance_out, variance_out,
...@@ -85,17 +83,16 @@ class InplaceABNKernel : public framework::OpKernel<T> { ...@@ -85,17 +83,16 @@ class InplaceABNKernel : public framework::OpKernel<T> {
static_cast<const typename framework::ConvertToPhiContext< static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx), DeviceContext>::TYPE&>(dev_ctx),
*x, *x,
*scale,
*bias,
*mean, *mean,
*variance, *variance,
*scale,
*bias,
is_test,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
y, y,
mean_out, mean_out,
variance_out, variance_out,
...@@ -146,7 +143,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> { ...@@ -146,7 +143,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> {
auto is_test = ctx.Attr<bool>("is_test"); auto is_test = ctx.Attr<bool>("is_test");
auto use_global_stats = ctx.Attr<bool>("use_global_stats"); auto use_global_stats = ctx.Attr<bool>("use_global_stats");
auto trainable_statistics = ctx.Attr<bool>("trainable_statistics"); auto trainable_statistics = ctx.Attr<bool>("trainable_statistics");
auto fuse_with_relu = ctx.Attr<bool>("fuse_with_relu");
auto* scale_grad = auto* scale_grad =
ctx.Output<phi::DenseTensor>(framework::GradVarName("Scale")); ctx.Output<phi::DenseTensor>(framework::GradVarName("Scale"));
...@@ -210,7 +206,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> { ...@@ -210,7 +206,6 @@ class InplaceABNGradKernel : public framework::OpKernel<T> {
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
true, true,
d_x, d_x,
scale_grad, scale_grad,
......
...@@ -129,8 +129,8 @@ ...@@ -129,8 +129,8 @@
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : batch_norm_double_grad - backward_op : batch_norm_double_grad
forward : batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias) forward : batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias)
args : (Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad) output : Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
...@@ -142,8 +142,8 @@ ...@@ -142,8 +142,8 @@
inplace : (grad_out -> grad_out_grad) inplace : (grad_out -> grad_out_grad)
- backward_op : batch_norm_grad - backward_op : batch_norm_grad
forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) forward : batch_norm (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
...@@ -345,9 +345,21 @@ ...@@ -345,9 +345,21 @@
use_gpudnn : true use_gpudnn : true
backward : conv2d_transpose_double_grad backward : conv2d_transpose_double_grad
- backward_op : conv3d_double_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv3d_double_grad
use_gpudnn : true
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_grad - backward_op : conv3d_grad
forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out) forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad) output : Tensor(input_grad), Tensor(filter_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
...@@ -355,19 +367,7 @@ ...@@ -355,19 +367,7 @@
kernel : kernel :
func : conv3d_grad func : conv3d_grad
use_gpudnn : true use_gpudnn : true
backward : conv3d_grad_grad backward : conv3d_double_grad
- backward_op : conv3d_grad_grad
forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : conv3d_grad_grad
use_gpudnn : true
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_transpose_grad - backward_op : conv3d_transpose_grad
forward : conv3d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) forward : conv3d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
...@@ -427,29 +427,29 @@ ...@@ -427,29 +427,29 @@
data_type : x data_type : x
optional : mask optional : mask
- backward_op : depthwise_conv2d_double_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : depthwise_conv2d_double_grad
optional : grad_input_grad, grad_filter_grad
- backward_op : depthwise_conv2d_grad - backward_op : depthwise_conv2d_grad
forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(out) forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_gpudnn) -> Tensor(out)
args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_gpudnn)
output : Tensor(input_grad), Tensor(filter_grad) output : Tensor(input_grad), Tensor(filter_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : GeneralBinaryGradInferMeta
param : [input, filter] param : [input, filter]
kernel : kernel :
func : depthwise_conv2d_grad func : depthwise_conv2d_grad
param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu] param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format]
use_gpudnn : use_gpudnn use_gpudnn : use_gpudnn
backward : depthwise_conv2d_grad_grad backward : depthwise_conv2d_double_grad
- backward_op : depthwise_conv2d_grad_grad
forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) -> Tensor(grad_input), Tensor(grad_filter)
args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu)
output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param: [input, filter, grad_out]
kernel :
func : depthwise_conv2d_grad_grad
optional : grad_input_grad, grad_filter_grad
- backward_op : depthwise_conv2d_transpose_grad - backward_op : depthwise_conv2d_transpose_grad
forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
...@@ -2091,8 +2091,8 @@ ...@@ -2091,8 +2091,8 @@
inplace : (out_grad -> x_grad) inplace : (out_grad -> x_grad)
- backward_op : sync_batch_norm_grad - backward_op : sync_batch_norm_grad
forward : sync_batch_norm_ (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) forward : sync_batch_norm_ (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
......
...@@ -254,7 +254,7 @@ ...@@ -254,7 +254,7 @@
inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates) inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates)
- op : batch_norm - op : batch_norm
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta: infer_meta:
func : BatchNormInferMeta func : BatchNormInferMeta
...@@ -464,7 +464,7 @@ ...@@ -464,7 +464,7 @@
backward : conv2d_transpose_grad backward : conv2d_transpose_grad
- op : conv3d - op : conv3d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format)
output : Tensor output : Tensor
infer_meta : infer_meta :
func : Conv3DInferMeta func : Conv3DInferMeta
...@@ -551,14 +551,14 @@ ...@@ -551,14 +551,14 @@
backward : deformable_conv_grad backward : deformable_conv_grad
- op : depthwise_conv2d - op : depthwise_conv2d
args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search, bool fuse_relu, bool use_gpudnn) args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_gpudnn)
output : Tensor(out) output : Tensor(out)
infer_meta : infer_meta :
func : DepthwiseConvInferMeta func : DepthwiseConvInferMeta
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search] param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format]
kernel : kernel :
func : depthwise_conv2d func : depthwise_conv2d
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, fuse_relu] param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format]
use_gpudnn : use_gpudnn use_gpudnn : use_gpudnn
backward : depthwise_conv2d_grad backward : depthwise_conv2d_grad
...@@ -2373,7 +2373,7 @@ ...@@ -2373,7 +2373,7 @@
backward : swish_grad backward : swish_grad
- op : sync_batch_norm_ - op : sync_batch_norm_
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta : infer_meta :
func : BatchNormInferMeta func : BatchNormInferMeta
......
...@@ -101,8 +101,8 @@ ...@@ -101,8 +101,8 @@
atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_op : batch_norm_grad - backward_op : batch_norm_grad
forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) forward : batch_norm (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
...@@ -368,8 +368,8 @@ ...@@ -368,8 +368,8 @@
subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
- backward_op : sync_batch_norm_grad - backward_op : sync_batch_norm_grad
forward : sync_batch_norm_(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) forward : sync_batch_norm_(Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
infer_meta : infer_meta :
func : GeneralTernaryGradInferMeta func : GeneralTernaryGradInferMeta
......
...@@ -88,7 +88,7 @@ ...@@ -88,7 +88,7 @@
backward : atanh_grad backward : atanh_grad
- op : batch_norm - op : batch_norm
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta : infer_meta :
func : BatchNormInferMeta func : BatchNormInferMeta
...@@ -324,7 +324,7 @@ ...@@ -324,7 +324,7 @@
backward : subtract_grad backward : subtract_grad
- op : sync_batch_norm_ - op : sync_batch_norm_
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) args : (Tensor x, Tensor mean, Tensor variance, Tensor scale, Tensor bias, bool is_test, float momentum, float epsilon, str data_layout, bool use_global_stats, bool trainable_statistics)
output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
infer_meta : infer_meta :
func : BatchNormInferMeta func : BatchNormInferMeta
......
...@@ -564,9 +564,6 @@ void Conv3DInferMeta(const MetaTensor& input, ...@@ -564,9 +564,6 @@ void Conv3DInferMeta(const MetaTensor& input,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config) { MetaConfig config) {
ConvInferMeta(input, ConvInferMeta(input,
...@@ -927,9 +924,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input, ...@@ -927,9 +924,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config) { MetaConfig config) {
ConvInferMeta(input, ConvInferMeta(input,
......
...@@ -95,9 +95,6 @@ void Conv3DInferMeta(const MetaTensor& input, ...@@ -95,9 +95,6 @@ void Conv3DInferMeta(const MetaTensor& input,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
...@@ -151,9 +148,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input, ...@@ -151,9 +148,6 @@ void DepthwiseConvInferMeta(const MetaTensor& input,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
......
...@@ -534,17 +534,16 @@ void AverageAccumulatesInferMeta(const MetaTensor& param, ...@@ -534,17 +534,16 @@ void AverageAccumulatesInferMeta(const MetaTensor& param,
} }
void BatchNormInferMeta(const MetaTensor& x, void BatchNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean, const MetaTensor& mean,
const MetaTensor& variance, const MetaTensor& variance,
const MetaTensor& scale,
const MetaTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout_str, const std::string& data_layout_str,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
MetaTensor* y, MetaTensor* y,
MetaTensor* mean_out, MetaTensor* mean_out,
MetaTensor* variance_out, MetaTensor* variance_out,
...@@ -646,10 +645,10 @@ void BatchNormInferMeta(const MetaTensor& x, ...@@ -646,10 +645,10 @@ void BatchNormInferMeta(const MetaTensor& x,
} }
void BatchNormInferInferMeta(const MetaTensor& x, void BatchNormInferInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean, const MetaTensor& mean,
const MetaTensor& variance, const MetaTensor& variance,
const MetaTensor& scale,
const MetaTensor& bias,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
...@@ -658,17 +657,16 @@ void BatchNormInferInferMeta(const MetaTensor& x, ...@@ -658,17 +657,16 @@ void BatchNormInferInferMeta(const MetaTensor& x,
MetaTensor* variance_out, MetaTensor* variance_out,
MetaConfig config) { MetaConfig config) {
BatchNormInferMeta(x, BatchNormInferMeta(x,
scale,
bias,
mean, mean,
variance, variance,
scale,
bias,
/*is_test=*/true,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
/*is_test=*/true,
/*use_global_stats=*/false, /*use_global_stats=*/false,
/*trainable_statistics=*/false, /*trainable_statistics=*/false,
/*fuse_with_relu=*/false,
y, y,
mean_out, mean_out,
variance_out, variance_out,
......
...@@ -158,17 +158,16 @@ void AverageAccumulatesInferMeta(const MetaTensor& param, ...@@ -158,17 +158,16 @@ void AverageAccumulatesInferMeta(const MetaTensor& param,
MetaTensor* out_num_updates); MetaTensor* out_num_updates);
void BatchNormInferMeta(const MetaTensor& x, void BatchNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean, const MetaTensor& mean,
const MetaTensor& variance, const MetaTensor& variance,
const MetaTensor& scale,
const MetaTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
MetaTensor* y, MetaTensor* y,
MetaTensor* mean_out, MetaTensor* mean_out,
MetaTensor* variance_out, MetaTensor* variance_out,
...@@ -178,10 +177,10 @@ void BatchNormInferMeta(const MetaTensor& x, ...@@ -178,10 +177,10 @@ void BatchNormInferMeta(const MetaTensor& x,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void BatchNormInferInferMeta(const MetaTensor& x, void BatchNormInferInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean, const MetaTensor& mean,
const MetaTensor& variance, const MetaTensor& variance,
const MetaTensor& scale,
const MetaTensor& bias,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
......
...@@ -37,7 +37,6 @@ void BatchNormGradRawKernel(const Context& dev_ctx, ...@@ -37,7 +37,6 @@ void BatchNormGradRawKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
bool is_inplace, bool is_inplace,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
...@@ -60,7 +59,6 @@ void BatchNormGradKernel(const Context& dev_ctx, ...@@ -60,7 +59,6 @@ void BatchNormGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad); DenseTensor* bias_grad);
...@@ -83,7 +81,6 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx, ...@@ -83,7 +81,6 @@ void BatchNormDoubleGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* y_grad_grad); DenseTensor* y_grad_grad);
......
...@@ -22,10 +22,10 @@ namespace phi { ...@@ -22,10 +22,10 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormInferKernel(const Context& dev_ctx, void BatchNormInferKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
...@@ -39,17 +39,16 @@ void BatchNormInferKernel(const Context& dev_ctx, ...@@ -39,17 +39,16 @@ void BatchNormInferKernel(const Context& dev_ctx,
auto saved_variance = phi::EmptyLike<T, Context>(dev_ctx, *variance_out); auto saved_variance = phi::EmptyLike<T, Context>(dev_ctx, *variance_out);
BatchNormKernel<T, Context>(dev_ctx, BatchNormKernel<T, Context>(dev_ctx,
x, x,
scale,
bias,
mean, mean,
variance, variance,
scale,
bias,
/*is_test=*/true,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
/*is_test=*/true,
/*use_global_stats=*/false, /*use_global_stats=*/false,
/*trainable_statistics=*/false, /*trainable_statistics=*/false,
/*fuse_with_relu=*/false,
y, y,
mean_out, mean_out,
variance_out, variance_out,
......
...@@ -23,17 +23,16 @@ namespace phi { ...@@ -23,17 +23,16 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormKernel(const Context& dev_ctx, void BatchNormKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* y, DenseTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
...@@ -44,10 +43,10 @@ void BatchNormKernel(const Context& dev_ctx, ...@@ -44,10 +43,10 @@ void BatchNormKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormInferKernel(const Context& dev_ctx, void BatchNormInferKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
......
...@@ -43,9 +43,6 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -43,9 +43,6 @@ void Conv3DGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad); DenseTensor* filter_grad);
...@@ -60,10 +57,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -60,10 +57,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad); DenseTensor* filter_grad);
...@@ -85,23 +78,21 @@ void ConvGradGradKernel(const Context& dev_ctx, ...@@ -85,23 +78,21 @@ void ConvGradGradKernel(const Context& dev_ctx,
DenseTensor* out_grad_grad); DenseTensor* out_grad_grad);
template <typename T, typename Context> template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& dev_ctx, void Conv3DDoubleGradKernel(
const DenseTensor& input, const Context& dev_ctx,
const DenseTensor& filter, const DenseTensor& input,
const DenseTensor& out_grad, const DenseTensor& filter,
const paddle::optional<DenseTensor>& input_grad_grad, const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& filter_grad_grad, const paddle::optional<DenseTensor>& input_grad_grad,
const std::vector<int>& strides, const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& paddings, const std::vector<int>& strides,
const std::string& padding_algorithm, const std::vector<int>& paddings,
int groups, const std::string& padding_algorithm,
const std::vector<int>& dilations, int groups,
const std::string& data_format, const std::vector<int>& dilations,
bool use_addto, const std::string& data_format,
int workspace_size_MB, DenseTensor* input_grad,
bool exhaustive_search, DenseTensor* filter_grad,
DenseTensor* input_grad, DenseTensor* out_grad_grad);
DenseTensor* filter_grad,
DenseTensor* out_grad_grad);
} // namespace phi } // namespace phi
...@@ -40,9 +40,6 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -40,9 +40,6 @@ void Conv3DKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
...@@ -55,10 +52,6 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -55,10 +52,6 @@ void DepthwiseConvKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out); DenseTensor* out);
} // namespace phi } // namespace phi
...@@ -52,7 +52,6 @@ void BatchNormGradRawKernel(const Context& ctx, ...@@ -52,7 +52,6 @@ void BatchNormGradRawKernel(const Context& ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
bool is_inplace, bool is_inplace,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
...@@ -310,7 +309,6 @@ void BatchNormGradKernel(const Context& dev_ctx, ...@@ -310,7 +309,6 @@ void BatchNormGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad) { DenseTensor* bias_grad) {
...@@ -330,7 +328,6 @@ void BatchNormGradKernel(const Context& dev_ctx, ...@@ -330,7 +328,6 @@ void BatchNormGradKernel(const Context& dev_ctx,
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
false, false,
x_grad, x_grad,
scale_grad, scale_grad,
...@@ -355,7 +352,6 @@ void BatchNormDoubleGradKernel(const Context& ctx, ...@@ -355,7 +352,6 @@ void BatchNormDoubleGradKernel(const Context& ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* y_grad_grad) { DenseTensor* y_grad_grad) {
......
...@@ -36,17 +36,16 @@ using ConstEigenVectorArrayMap = ...@@ -36,17 +36,16 @@ using ConstEigenVectorArrayMap =
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormKernel(const Context& ctx, void BatchNormKernel(const Context& ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout_str, const std::string& data_layout_str,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* y, DenseTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
......
...@@ -31,10 +31,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -31,10 +31,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
ConvGradKernel<T>(dev_ctx, ConvGradKernel<T>(dev_ctx,
...@@ -62,9 +58,6 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -62,9 +58,6 @@ void Conv3DGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
ConvGradKernel<T>(dev_ctx, ConvGradKernel<T>(dev_ctx,
...@@ -82,24 +75,22 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -82,24 +75,22 @@ void Conv3DGradKernel(const Context& dev_ctx,
} }
template <typename T, typename Context> template <typename T, typename Context>
void Conv3DGradGradKernel(const Context& ctx, void Conv3DDoubleGradKernel(
const DenseTensor& input, const Context& ctx,
const DenseTensor& filter, const DenseTensor& input,
const DenseTensor& out_grad, const DenseTensor& filter,
const paddle::optional<DenseTensor>& input_grad_grad, const DenseTensor& out_grad,
const paddle::optional<DenseTensor>& filter_grad_grad, const paddle::optional<DenseTensor>& input_grad_grad,
const std::vector<int>& strides, const paddle::optional<DenseTensor>& filter_grad_grad,
const std::vector<int>& paddings_t, const std::vector<int>& strides,
const std::string& padding_algorithm, const std::vector<int>& paddings_t,
int groups, const std::string& padding_algorithm,
const std::vector<int>& dilations_t, int groups,
const std::string& data_format, const std::vector<int>& dilations_t,
bool use_addto, const std::string& data_format,
int workspace_size_MB, DenseTensor* input_grad,
bool exhaustive_search_t, DenseTensor* filter_grad,
DenseTensor* input_grad, DenseTensor* out_grad_grad) {
DenseTensor* filter_grad,
DenseTensor* out_grad_grad) {
ConvGradGradKernel<T>(ctx, ConvGradGradKernel<T>(ctx,
input, input,
filter, filter,
...@@ -136,9 +127,9 @@ PD_REGISTER_KERNEL( ...@@ -136,9 +127,9 @@ PD_REGISTER_KERNEL(
conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) { conv2d_grad_grad, CPU, ALL_LAYOUT, phi::ConvGradGradKernel, float, double) {
} }
PD_REGISTER_KERNEL(conv3d_grad_grad, PD_REGISTER_KERNEL(conv3d_double_grad,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::Conv3DGradGradKernel, phi::Conv3DDoubleGradKernel,
float, float,
double) {} double) {}
...@@ -53,10 +53,6 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -53,10 +53,6 @@ void DepthwiseConvKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out) { DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
...@@ -80,9 +76,6 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -80,9 +76,6 @@ void Conv3DKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out) { DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
......
...@@ -578,7 +578,6 @@ void BatchNormGradRawKernel(const Context &ctx, ...@@ -578,7 +578,6 @@ void BatchNormGradRawKernel(const Context &ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
bool is_inplace, bool is_inplace,
DenseTensor *x_grad, DenseTensor *x_grad,
DenseTensor *scale_grad, DenseTensor *scale_grad,
...@@ -1262,7 +1261,6 @@ void BatchNormGradKernel(const Context &dev_ctx, ...@@ -1262,7 +1261,6 @@ void BatchNormGradKernel(const Context &dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor *x_grad, DenseTensor *x_grad,
DenseTensor *scale_grad, DenseTensor *scale_grad,
DenseTensor *bias_grad) { DenseTensor *bias_grad) {
...@@ -1282,7 +1280,6 @@ void BatchNormGradKernel(const Context &dev_ctx, ...@@ -1282,7 +1280,6 @@ void BatchNormGradKernel(const Context &dev_ctx,
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
false, false,
x_grad, x_grad,
scale_grad, scale_grad,
...@@ -1307,7 +1304,6 @@ void BatchNormDoubleGradKernel(const Context &ctx, ...@@ -1307,7 +1304,6 @@ void BatchNormDoubleGradKernel(const Context &ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor *x_grad, DenseTensor *x_grad,
DenseTensor *scale_grad, DenseTensor *scale_grad,
DenseTensor *y_grad_grad) { DenseTensor *y_grad_grad) {
......
...@@ -533,17 +533,16 @@ static __global__ void BNForwardTraining2DWriteRes( ...@@ -533,17 +533,16 @@ static __global__ void BNForwardTraining2DWriteRes(
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormKernel(const Context &ctx, void BatchNormKernel(const Context &ctx,
const DenseTensor &x, const DenseTensor &x,
const DenseTensor &scale,
const DenseTensor &bias,
const DenseTensor &mean, const DenseTensor &mean,
const DenseTensor &variance, const DenseTensor &variance,
const DenseTensor &scale,
const DenseTensor &bias,
bool is_test,
float momentum, float momentum,
float epsilon_f, float epsilon_f,
const std::string &data_layout_str, const std::string &data_layout_str,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor *y, DenseTensor *y,
DenseTensor *mean_out, DenseTensor *mean_out,
DenseTensor *variance_out, DenseTensor *variance_out,
......
...@@ -31,9 +31,6 @@ void Conv3DGradKernel(const Context& dev_ctx, ...@@ -31,9 +31,6 @@ void Conv3DGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
ConvGradKernel<T>(dev_ctx, ConvGradKernel<T>(dev_ctx,
......
...@@ -53,9 +53,6 @@ void Conv3DKernel(const Context& dev_ctx, ...@@ -53,9 +53,6 @@ void Conv3DKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out) { DenseTensor* out) {
ConvKernelImpl<T>(dev_ctx, ConvKernelImpl<T>(dev_ctx,
input, input,
......
...@@ -33,16 +33,19 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -33,16 +33,19 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
const DenseTensor* output_grad = &out_grad; const DenseTensor* output_grad = &out_grad;
if (!input_grad && !filter_grad) return; if (!input_grad && !filter_grad) return;
bool has_fuse_relu = dev_ctx.HasDnnAttr("fuse_relu_before_depthwise_conv");
bool fuse_relu =
has_fuse_relu
? PADDLE_GET_CONST(
bool, dev_ctx.GetDnnAttr("fuse_relu_before_depthwise_conv"))
: false;
std::vector<int> strides = strides_t; std::vector<int> strides = strides_t;
std::vector<int> paddings = paddings_t; std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t; std::vector<int> dilations = dilations_t;
......
...@@ -31,10 +31,6 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -31,10 +31,6 @@ void DepthwiseConvKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out) { DenseTensor* out) {
DenseTensor* output = out; DenseTensor* output = out;
output->mutable_data<T>(dev_ctx.GetPlace()); output->mutable_data<T>(dev_ctx.GetPlace());
...@@ -44,6 +40,14 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -44,6 +40,14 @@ void DepthwiseConvKernel(const Context& dev_ctx,
std::vector<int> paddings = paddings_t; std::vector<int> paddings = paddings_t;
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
bool has_fuse_relu = dev_ctx.HasDnnAttr("fuse_relu_before_depthwise_conv");
bool fuse_relu =
has_fuse_relu
? PADDLE_GET_CONST(
bool, dev_ctx.GetDnnAttr("fuse_relu_before_depthwise_conv"))
: false;
if (channel_last) { if (channel_last) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
output->dims()[output->dims().size() - 1] % output->dims()[output->dims().size() - 1] %
......
...@@ -34,7 +34,6 @@ void SyncBatchNormGradKernel(const Context& ctx, ...@@ -34,7 +34,6 @@ void SyncBatchNormGradKernel(const Context& ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad) { DenseTensor* bias_grad) {
......
...@@ -22,17 +22,16 @@ namespace phi { ...@@ -22,17 +22,16 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void SyncBatchNormKernel(const Context &ctx, void SyncBatchNormKernel(const Context &ctx,
const DenseTensor &x, const DenseTensor &x,
const DenseTensor &scale,
const DenseTensor &bias,
const DenseTensor &mean, const DenseTensor &mean,
const DenseTensor &variance, const DenseTensor &variance,
const DenseTensor &scale,
const DenseTensor &bias,
bool is_test,
float momentum, float momentum,
float epsilon_f, float epsilon_f,
const std::string &data_layout_str, const std::string &data_layout_str,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor *y, DenseTensor *y,
DenseTensor *mean_out, DenseTensor *mean_out,
DenseTensor *variance_out, DenseTensor *variance_out,
......
...@@ -603,9 +603,6 @@ void Conv3DCudnnGradKernel(const Context& dev_ctx, ...@@ -603,9 +603,6 @@ void Conv3DCudnnGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
ConvCudnnGradKernel<T>(dev_ctx, ConvCudnnGradKernel<T>(dev_ctx,
...@@ -1295,10 +1292,6 @@ void DepthwiseConvDoubleGradGPUDNNKernel( ...@@ -1295,10 +1292,6 @@ void DepthwiseConvDoubleGradGPUDNNKernel(
int groups, int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
bool fuse_relu,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad, DenseTensor* filter_grad,
DenseTensor* out_grad_grad) { DenseTensor* out_grad_grad) {
...@@ -1320,7 +1313,7 @@ void DepthwiseConvDoubleGradGPUDNNKernel( ...@@ -1320,7 +1313,7 @@ void DepthwiseConvDoubleGradGPUDNNKernel(
} }
template <typename T, typename Context> template <typename T, typename Context>
void Conv3DCudnnGradGradKernel( void Conv3DCudnnDoubleGradKernel(
const Context& ctx, const Context& ctx,
const DenseTensor& input, const DenseTensor& input,
const DenseTensor& filter, const DenseTensor& filter,
...@@ -1333,9 +1326,6 @@ void Conv3DCudnnGradGradKernel( ...@@ -1333,9 +1326,6 @@ void Conv3DCudnnGradGradKernel(
int groups, int groups,
const std::vector<int>& dilations_t, const std::vector<int>& dilations_t,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search_t,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad, DenseTensor* filter_grad,
DenseTensor* out_grad_grad) { DenseTensor* out_grad_grad) {
...@@ -1386,14 +1376,14 @@ PD_REGISTER_KERNEL(conv2d_grad_grad, ...@@ -1386,14 +1376,14 @@ PD_REGISTER_KERNEL(conv2d_grad_grad,
float, float,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(conv3d_grad_grad, PD_REGISTER_KERNEL(conv3d_double_grad,
GPUDNN, GPUDNN,
ALL_LAYOUT, ALL_LAYOUT,
phi::Conv3DCudnnGradGradKernel, phi::Conv3DCudnnDoubleGradKernel,
float, float,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(depthwise_conv2d_grad_grad, PD_REGISTER_KERNEL(depthwise_conv2d_double_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::DepthwiseConvDoubleGradGPUDNNKernel, phi::DepthwiseConvDoubleGradGPUDNNKernel,
...@@ -1427,16 +1417,16 @@ PD_REGISTER_KERNEL(conv2d_grad_grad, ...@@ -1427,16 +1417,16 @@ PD_REGISTER_KERNEL(conv2d_grad_grad,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(conv3d_grad_grad, PD_REGISTER_KERNEL(conv3d_double_grad,
GPUDNN, GPUDNN,
ALL_LAYOUT, ALL_LAYOUT,
phi::Conv3DCudnnGradGradKernel, phi::Conv3DCudnnDoubleGradKernel,
float, float,
double, double,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(depthwise_conv2d_grad_grad, PD_REGISTER_KERNEL(depthwise_conv2d_double_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::DepthwiseConvDoubleGradGPUDNNKernel, phi::DepthwiseConvDoubleGradGPUDNNKernel,
...@@ -1469,15 +1459,15 @@ PD_REGISTER_KERNEL(conv2d_grad_grad, ...@@ -1469,15 +1459,15 @@ PD_REGISTER_KERNEL(conv2d_grad_grad,
double, double,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(conv3d_grad_grad, PD_REGISTER_KERNEL(conv3d_double_grad,
GPUDNN, GPUDNN,
ALL_LAYOUT, ALL_LAYOUT,
phi::Conv3DCudnnGradGradKernel, phi::Conv3DCudnnDoubleGradKernel,
float, float,
double, double,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(depthwise_conv2d_grad_grad, PD_REGISTER_KERNEL(depthwise_conv2d_double_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
phi::DepthwiseConvDoubleGradGPUDNNKernel, phi::DepthwiseConvDoubleGradGPUDNNKernel,
......
...@@ -397,9 +397,6 @@ void Conv3DCudnnKernel(const Context& dev_ctx, ...@@ -397,9 +397,6 @@ void Conv3DCudnnKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
DenseTensor* out) { DenseTensor* out) {
ConvCudnnKernel<T>(dev_ctx, ConvCudnnKernel<T>(dev_ctx,
input, input,
...@@ -423,10 +420,6 @@ void DepthwiseConvCudnnKernel(const Context& dev_ctx, ...@@ -423,10 +420,6 @@ void DepthwiseConvCudnnKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out) { DenseTensor* out) {
ConvCudnnKernel<T>(dev_ctx, ConvCudnnKernel<T>(dev_ctx,
input, input,
......
...@@ -38,7 +38,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx, ...@@ -38,7 +38,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad) { DenseTensor* bias_grad) {
...@@ -61,7 +60,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx, ...@@ -61,7 +60,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx,
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
x_grad->mutable_values(), x_grad->mutable_values(),
scale_grad, scale_grad,
bias_grad); bias_grad);
......
...@@ -39,7 +39,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx, ...@@ -39,7 +39,6 @@ void BatchNormCooGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad); DenseTensor* bias_grad);
......
...@@ -23,17 +23,16 @@ namespace sparse { ...@@ -23,17 +23,16 @@ namespace sparse {
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormCooKernel(const Context& dev_ctx, void BatchNormCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* y, SparseCooTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
...@@ -43,17 +42,16 @@ void BatchNormCooKernel(const Context& dev_ctx, ...@@ -43,17 +42,16 @@ void BatchNormCooKernel(const Context& dev_ctx,
EmptyLikeCooKernel<T, Context>(dev_ctx, x, y); EmptyLikeCooKernel<T, Context>(dev_ctx, x, y);
phi::BatchNormKernel<T, Context>(dev_ctx, phi::BatchNormKernel<T, Context>(dev_ctx,
x.values(), x.values(),
scale,
bias,
mean, mean,
variance, variance,
scale,
bias,
is_test,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
y->mutable_values(), y->mutable_values(),
mean_out, mean_out,
variance_out, variance_out,
......
...@@ -35,7 +35,6 @@ void BatchNormKernel(const Context& dev_ctx, ...@@ -35,7 +35,6 @@ void BatchNormKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* y, SparseCooTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
......
...@@ -37,7 +37,6 @@ void SyncBatchNormCooGradKernel( ...@@ -37,7 +37,6 @@ void SyncBatchNormCooGradKernel(
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad) { DenseTensor* bias_grad) {
...@@ -58,7 +57,6 @@ void SyncBatchNormCooGradKernel( ...@@ -58,7 +57,6 @@ void SyncBatchNormCooGradKernel(
is_test, is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
x_grad->mutable_values(), x_grad->mutable_values(),
scale_grad, scale_grad,
bias_grad); bias_grad);
......
...@@ -23,17 +23,16 @@ namespace sparse { ...@@ -23,17 +23,16 @@ namespace sparse {
template <typename T, typename Context> template <typename T, typename Context>
void SyncBatchNormCooKernel(const Context& dev_ctx, void SyncBatchNormCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* y, SparseCooTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
...@@ -43,17 +42,16 @@ void SyncBatchNormCooKernel(const Context& dev_ctx, ...@@ -43,17 +42,16 @@ void SyncBatchNormCooKernel(const Context& dev_ctx,
EmptyLikeCooKernel<T, Context>(dev_ctx, x, y); EmptyLikeCooKernel<T, Context>(dev_ctx, x, y);
phi::SyncBatchNormKernel<T, Context>(dev_ctx, phi::SyncBatchNormKernel<T, Context>(dev_ctx,
x.values(), x.values(),
scale,
bias,
mean, mean,
variance, variance,
scale,
bias,
is_test,
momentum, momentum,
epsilon, epsilon,
data_layout, data_layout,
is_test,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
fuse_with_relu,
y->mutable_values(), y->mutable_values(),
mean_out, mean_out,
variance_out, variance_out,
......
...@@ -38,7 +38,6 @@ void SyncBatchNormCooGradKernel( ...@@ -38,7 +38,6 @@ void SyncBatchNormCooGradKernel(
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad); DenseTensor* bias_grad);
......
...@@ -25,17 +25,16 @@ namespace sparse { ...@@ -25,17 +25,16 @@ namespace sparse {
template <typename T, typename Context> template <typename T, typename Context>
void SyncBatchNormCooKernel(const Context& dev_ctx, void SyncBatchNormCooKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
SparseCooTensor* y, SparseCooTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
......
...@@ -35,7 +35,6 @@ void SyncBatchNormGradKernel(const Context& dev_ctx, ...@@ -35,7 +35,6 @@ void SyncBatchNormGradKernel(const Context& dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* x_grad, DenseTensor* x_grad,
DenseTensor* scale_grad, DenseTensor* scale_grad,
DenseTensor* bias_grad); DenseTensor* bias_grad);
......
...@@ -37,17 +37,16 @@ ccl::CCLComm GetCCLComm(const Place& place, int global_gid = 0); ...@@ -37,17 +37,16 @@ ccl::CCLComm GetCCLComm(const Place& place, int global_gid = 0);
template <typename T, typename Context> template <typename T, typename Context>
void SyncBatchNormKernel(const Context& dev_ctx, void SyncBatchNormKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* y, DenseTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
......
...@@ -86,7 +86,6 @@ void BatchNormGradKernel(const Context &dev_ctx, ...@@ -86,7 +86,6 @@ void BatchNormGradKernel(const Context &dev_ctx,
bool is_test, bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor *x_grad, DenseTensor *x_grad,
DenseTensor *scale_grad, DenseTensor *scale_grad,
DenseTensor *bias_grad) { DenseTensor *bias_grad) {
......
...@@ -23,17 +23,16 @@ namespace phi { ...@@ -23,17 +23,16 @@ namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void BatchNormKernel(const Context& dev_ctx, void BatchNormKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& bias,
const DenseTensor& mean, const DenseTensor& mean,
const DenseTensor& variance, const DenseTensor& variance,
const DenseTensor& scale,
const DenseTensor& bias,
bool is_test,
float momentum, float momentum,
float epsilon, float epsilon,
const std::string& data_layout_str, const std::string& data_layout,
bool is_test,
bool use_global_stats, bool use_global_stats,
bool trainable_statistics, bool trainable_statistics,
bool fuse_with_relu,
DenseTensor* y, DenseTensor* y,
DenseTensor* mean_out, DenseTensor* mean_out,
DenseTensor* variance_out, DenseTensor* variance_out,
......
...@@ -152,10 +152,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, ...@@ -152,10 +152,6 @@ void DepthwiseConvGradKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* input_grad, DenseTensor* input_grad,
DenseTensor* filter_grad) { DenseTensor* filter_grad) {
ConvGradKernel<T, Context>(dev_ctx, ConvGradKernel<T, Context>(dev_ctx,
......
...@@ -118,10 +118,6 @@ void DepthwiseConvKernel(const Context& dev_ctx, ...@@ -118,10 +118,6 @@ void DepthwiseConvKernel(const Context& dev_ctx,
int groups, int groups,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::string& data_format, const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
bool fuse_relu,
DenseTensor* out) { DenseTensor* out) {
ConvKernel<T, Context>(dev_ctx, ConvKernel<T, Context>(dev_ctx,
input, input,
......
...@@ -33,19 +33,18 @@ KernelSignature BatchNormOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -33,19 +33,18 @@ KernelSignature BatchNormOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (is_test && !use_global_stats && !trainable_statistics && if (is_test && !use_global_stats && !trainable_statistics &&
!fuse_with_relu) { !fuse_with_relu) {
return KernelSignature("batch_norm_infer", return KernelSignature("batch_norm_infer",
{"X", "Scale", "Bias", "Mean", "Variance"}, {"X", "Mean", "Variance", "Scale", "Bias"},
{"momentum", "epsilon", "data_layout"}, {"momentum", "epsilon", "data_layout"},
{"Y", "MeanOut", "VarianceOut"}); {"Y", "MeanOut", "VarianceOut"});
} else { } else {
return KernelSignature("batch_norm", return KernelSignature("batch_norm",
{"X", "Scale", "Bias", "Mean", "Variance"}, {"X", "Mean", "Variance", "Scale", "Bias"},
{"momentum", {"is_test",
"momentum",
"epsilon", "epsilon",
"data_layout", "data_layout",
"is_test",
"use_global_stats", "use_global_stats",
"trainable_statistics", "trainable_statistics"},
"fuse_with_relu"},
{"Y", {"Y",
"MeanOut", "MeanOut",
"VarianceOut", "VarianceOut",
...@@ -74,8 +73,7 @@ KernelSignature BatchNormGradOpArgumentMapping( ...@@ -74,8 +73,7 @@ KernelSignature BatchNormGradOpArgumentMapping(
"data_layout", "data_layout",
"is_test", "is_test",
"use_global_stats", "use_global_stats",
"trainable_statistics", "trainable_statistics"},
"fuse_with_relu"},
{"X@GRAD", "Scale@GRAD", "Bias@GRAD"}); {"X@GRAD", "Scale@GRAD", "Bias@GRAD"});
} }
...@@ -97,8 +95,7 @@ KernelSignature BatchNormGradGradOpArgumentMapping( ...@@ -97,8 +95,7 @@ KernelSignature BatchNormGradGradOpArgumentMapping(
"data_layout", "data_layout",
"is_test", "is_test",
"use_global_stats", "use_global_stats",
"trainable_statistics", "trainable_statistics"},
"fuse_with_relu"},
{"DX", "DScale", "DDY"}); {"DX", "DScale", "DDY"});
} }
......
...@@ -19,15 +19,14 @@ namespace phi { ...@@ -19,15 +19,14 @@ namespace phi {
KernelSignature Conv3dOpArgumentMapping(const ArgumentMappingContext& ctx) { KernelSignature Conv3dOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("conv3d", return KernelSignature("conv3d",
{"Input", "Filter"}, {"Input", "Filter"},
{"strides", {
"paddings", "strides",
"padding_algorithm", "paddings",
"groups", "padding_algorithm",
"dilations", "groups",
"data_format", "dilations",
"use_addto", "data_format",
"workspace_size_MB", },
"exhaustive_search"},
{"Output"}); {"Output"});
} }
...@@ -39,31 +38,27 @@ KernelSignature Conv3dGradOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -39,31 +38,27 @@ KernelSignature Conv3dGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
"padding_algorithm", "padding_algorithm",
"groups", "groups",
"dilations", "dilations",
"data_format", "data_format"},
"use_addto",
"workspace_size_MB",
"exhaustive_search"},
{"Input@GRAD", "Filter@GRAD"}); {"Input@GRAD", "Filter@GRAD"});
} }
KernelSignature Conv3dDoubleGradOpArgumentMapping( KernelSignature Conv3dDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature("conv3d_grad_grad", return KernelSignature("conv3d_double_grad",
{"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, {"Input", "Filter", "DOutput", "DDInput", "DDFilter"},
{"strides", {"strides",
"paddings", "paddings",
"padding_algorithm", "padding_algorithm",
"groups", "groups",
"dilations", "dilations",
"data_format", "data_format"},
"use_addto",
"workspace_size_MB",
"exhaustive_search"},
{"DInput", "DFilter", "DDOutput"}); {"DInput", "DFilter", "DDOutput"});
} }
} // namespace phi } // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(conv3d_grad_grad, conv3d_double_grad);
PD_REGISTER_ARG_MAPPING_FN(conv3d, phi::Conv3dOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(conv3d, phi::Conv3dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv3d_grad, phi::Conv3dGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(conv3d_grad, phi::Conv3dGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(conv3d_grad_grad, PD_REGISTER_ARG_MAPPING_FN(conv3d_grad_grad,
......
...@@ -25,11 +25,7 @@ KernelSignature DepthwiseConv2dOpArgumentMapping( ...@@ -25,11 +25,7 @@ KernelSignature DepthwiseConv2dOpArgumentMapping(
"padding_algorithm", "padding_algorithm",
"groups", "groups",
"dilations", "dilations",
"data_format", "data_format"},
"use_addto",
"workspace_size_MB",
"exhaustive_search",
"fuse_relu_before_depthwise_conv"},
{"Output"}); {"Output"});
} }
...@@ -42,33 +38,28 @@ KernelSignature DepthwiseConv2dGradOpArgumentMapping( ...@@ -42,33 +38,28 @@ KernelSignature DepthwiseConv2dGradOpArgumentMapping(
"padding_algorithm", "padding_algorithm",
"groups", "groups",
"dilations", "dilations",
"data_format", "data_format"},
"use_addto",
"workspace_size_MB",
"exhaustive_search",
"fuse_relu_before_depthwise_conv"},
{"Input@GRAD", "Filter@GRAD"}); {"Input@GRAD", "Filter@GRAD"});
} }
KernelSignature DepthwiseConv2dDoubleGradOpArgumentMapping( KernelSignature DepthwiseConv2dDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature("depthwise_conv2d_grad_grad", return KernelSignature("depthwise_conv2d_double_grad",
{"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, {"Input", "Filter", "DOutput", "DDInput", "DDFilter"},
{"strides", {"strides",
"paddings", "paddings",
"padding_algorithm", "padding_algorithm",
"groups", "groups",
"dilations", "dilations",
"data_format", "data_format"},
"use_addto",
"workspace_size_MB",
"exhaustive_search",
"fuse_relu_before_depthwise_conv"},
{"DInput", "DFilter", "DDOutput"}); {"DInput", "DFilter", "DDOutput"});
} }
} // namespace phi } // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(depthwise_conv2d_grad_grad,
depthwise_conv2d_double_grad);
PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d, PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d,
phi::DepthwiseConv2dOpArgumentMapping); phi::DepthwiseConv2dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad, PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad,
......
...@@ -19,14 +19,13 @@ namespace phi { ...@@ -19,14 +19,13 @@ namespace phi {
KernelSignature SyncBatchNormOpArgumentMapping( KernelSignature SyncBatchNormOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature("sync_batch_norm", return KernelSignature("sync_batch_norm",
{"X", "Scale", "Bias", "Mean", "Variance"}, {"X", "Mean", "Variance", "Scale", "Bias"},
{"momentum", {"is_test",
"momentum",
"epsilon", "epsilon",
"data_layout", "data_layout",
"is_test",
"use_global_stats", "use_global_stats",
"trainable_statistics", "trainable_statistics"},
"fuse_with_relu"},
{"Y", {"Y",
"MeanOut", "MeanOut",
"VarianceOut", "VarianceOut",
...@@ -52,8 +51,7 @@ KernelSignature SyncBatchNormGradOpArgumentMapping( ...@@ -52,8 +51,7 @@ KernelSignature SyncBatchNormGradOpArgumentMapping(
"data_layout", "data_layout",
"is_test", "is_test",
"use_global_stats", "use_global_stats",
"trainable_statistics", "trainable_statistics"},
"fuse_with_relu"},
{"X@GRAD", "Scale@GRAD", "Bias@GRAD"}); {"X@GRAD", "Scale@GRAD", "Bias@GRAD"});
} }
......
...@@ -1533,17 +1533,16 @@ class BatchNorm(layers.Layer): ...@@ -1533,17 +1533,16 @@ class BatchNorm(layers.Layer):
if in_dygraph_mode(): if in_dygraph_mode():
batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm( batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
input, input,
self.weight,
self.bias,
self._mean, self._mean,
self._variance, self._variance,
self.weight,
self.bias,
not self.training,
self._momentum, self._momentum,
self._epsilon, self._epsilon,
self._data_layout, self._data_layout,
not self.training,
self._use_global_stats, self._use_global_stats,
self._trainable_statistics, self._trainable_statistics,
False,
) )
return dygraph_utils._append_activation_in_dygraph( return dygraph_utils._append_activation_in_dygraph(
batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
......
...@@ -172,10 +172,6 @@ def _conv_nd( ...@@ -172,10 +172,6 @@ def _conv_nd(
groups, groups,
dilation, dilation,
data_format, data_format,
False,
-1,
False,
False,
use_cudnn, use_cudnn,
) )
if bias is not None: if bias is not None:
...@@ -202,9 +198,6 @@ def _conv_nd( ...@@ -202,9 +198,6 @@ def _conv_nd(
groups, groups,
dilation, dilation,
data_format, data_format,
False,
-1,
False,
) )
if bias is not None: if bias is not None:
channel_dim = ( channel_dim = (
......
...@@ -202,17 +202,16 @@ def batch_norm( ...@@ -202,17 +202,16 @@ def batch_norm(
if in_dygraph_mode(): if in_dygraph_mode():
batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm(
x, x,
weight,
bias,
running_mean, running_mean,
running_var, running_var,
weight,
bias,
not training,
momentum, momentum,
epsilon, epsilon,
data_format, data_format,
not training,
use_global_stats, use_global_stats,
trainable_statistics, trainable_statistics,
False,
) )
return dygraph_utils._append_activation_in_dygraph( return dygraph_utils._append_activation_in_dygraph(
......
...@@ -1180,15 +1180,14 @@ class SyncBatchNorm(_BatchNormBase): ...@@ -1180,15 +1180,14 @@ class SyncBatchNorm(_BatchNormBase):
if in_dygraph_mode(): if in_dygraph_mode():
sync_batch_norm_out, _, _, _, _, _ = _C_ops.sync_batch_norm_( sync_batch_norm_out, _, _, _, _, _ = _C_ops.sync_batch_norm_(
x, x,
self.weight,
self.bias,
self._mean, self._mean,
self._variance, self._variance,
self.weight,
self.bias,
not self.training,
self._momentum, self._momentum,
self._epsilon, self._epsilon,
self._data_format, self._data_format,
not self.training,
False,
False, False,
False, False,
) )
......
...@@ -140,17 +140,16 @@ class BatchNorm(paddle.nn.BatchNorm1D): ...@@ -140,17 +140,16 @@ class BatchNorm(paddle.nn.BatchNorm1D):
if in_dynamic_mode(): if in_dynamic_mode():
batch_norm_out, _, _, _, _, _ = _C_ops.sparse_batch_norm( batch_norm_out, _, _, _, _, _ = _C_ops.sparse_batch_norm(
input, input,
self.weight,
self.bias,
self._mean, self._mean,
self._variance, self._variance,
self.weight,
self.bias,
not self.training,
self._momentum, self._momentum,
self._epsilon, self._epsilon,
data_format, data_format,
not self.training,
self._use_global_stats, self._use_global_stats,
trainable_statistics, trainable_statistics,
False,
) )
return batch_norm_out return batch_norm_out
else: else:
...@@ -324,15 +323,14 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm): ...@@ -324,15 +323,14 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
self._check_data_format() self._check_data_format()
sync_batch_norm_out, _, _, _, _, _ = _C_ops.sparse_sync_batch_norm_( sync_batch_norm_out, _, _, _, _, _ = _C_ops.sparse_sync_batch_norm_(
x, x,
self.weight,
self.bias,
self._mean, self._mean,
self._variance, self._variance,
self.weight,
self.bias,
not self.training,
self._momentum, self._momentum,
self._epsilon, self._epsilon,
self._data_format, self._data_format,
not self.training,
False,
False, False,
False, False,
) )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册