From 8e4e19ab3003c8d8a29a319ada9cd3422b64a999 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Sat, 19 Mar 2022 12:40:58 +0800 Subject: [PATCH] Add infer meta (#40544) * add infer meta; test=develop * add histogram infer meta; test=develop * fix unitest bug; test=develop * format; test=develop * format; test=develop * bn not use new infer meta; test=develop * add infer meta; test=develop * fixbug; test=develop * fix bug; * recover unitest; test=develop --- paddle/fluid/operators/batch_norm_op.cc | 763 +----------------- paddle/fluid/operators/batch_norm_op.h | 18 - paddle/fluid/operators/conv_op.cc | 9 + .../fluid/operators/detection/yolo_box_op.cc | 7 +- paddle/fluid/operators/dropout_op.cc | 19 +- .../fluid/operators/fused/conv_fusion_op.cc | 136 ++++ paddle/fluid/operators/histogram_op.cc | 30 +- paddle/fluid/operators/inplace_abn_op.cc | 1 + paddle/fluid/operators/masked_select_op.cc | 19 +- paddle/fluid/operators/norm_op.cc | 22 +- paddle/fluid/operators/sync_batch_norm_op.cc | 1 + paddle/phi/infermeta/binary.cc | 277 +++++++ paddle/phi/infermeta/binary.h | 32 + paddle/phi/infermeta/multiary.cc | 112 +++ paddle/phi/infermeta/multiary.h | 20 + paddle/phi/infermeta/unary.cc | 49 ++ paddle/phi/infermeta/unary.h | 10 + .../test_mkldnn_conv_gelu_fuse_pass.py | 2 + 18 files changed, 692 insertions(+), 835 deletions(-) diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 174207deb0..5194c8772e 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -21,6 +21,9 @@ limitations under the License. */ #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/infermeta/multiary.h" + namespace paddle { namespace operators { @@ -297,184 +300,6 @@ The required data format for this layer is one of the following: )DOC"); } -template -class BatchNormKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const float epsilon = ctx.Attr("epsilon"); - float momentum = ctx.Attr("momentum"); - const bool is_test = ctx.Attr("is_test"); - const bool use_global_stats = ctx.Attr("use_global_stats"); - const bool trainable_stats = ctx.Attr("trainable_statistics"); - bool test_mode = is_test && (!trainable_stats); - - bool global_stats = test_mode || use_global_stats; - - const std::string data_layout_str = ctx.Attr("data_layout"); - DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - - const auto *x = ctx.Input("X"); - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE( - x_dims.size(), 2, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be larger than 1." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - PADDLE_ENFORCE_LE( - x_dims.size(), 5, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be less than 6." - "But received: the size of input X's dimensionss is [%d]", - x_dims.size())); - const int N = x_dims[0]; - const int C = - (data_layout == DataLayout::kNCHW ? x_dims[1] - : x_dims[x_dims.size() - 1]); - const int sample_size = x->numel() / N / C; - - auto *y = ctx.Output("Y"); - - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); - - // alloc memory - y->mutable_data(ctx.GetPlace()); - mean_out->mutable_data(ctx.GetPlace()); - variance_out->mutable_data(ctx.GetPlace()); - saved_mean->mutable_data(ctx.GetPlace()); - saved_variance->mutable_data(ctx.GetPlace()); - - // input dimension is 2 and the format is NCHW. The input can be regarded - // as NHWC format - if (x_dims.size() == 2 && data_layout == DataLayout::kNCHW) { - data_layout = DataLayout::kNHWC; - } - - if (!global_stats) { - // saved_xx is use just in this batch of data - EigenVectorArrayMap saved_mean_e( - saved_mean->mutable_data(ctx.GetPlace()), C); - EigenVectorArrayMap saved_variance_e( - saved_variance->mutable_data(ctx.GetPlace()), C); - saved_mean_e.setZero(); - saved_variance_e.setZero(); - - EigenVectorArrayMap running_mean_arr( - mean_out->mutable_data(ctx.GetPlace()), C); - EigenVectorArrayMap running_var_arr( - variance_out->mutable_data(ctx.GetPlace()), C); - - if ((N * sample_size) == 1) { - // Only 1 element in normalization dimension, - // we skip the batch norm calculation, let y = x. - framework::TensorCopy(*x, ctx.GetPlace(), y); - return; - } - - switch (data_layout) { - case DataLayout::kNCHW: { - ConstEigenArrayMap x_arr(x->data(), sample_size, N * C); - for (int nc = 0; nc < N * C; ++nc) { - saved_mean_e(nc % C) += x_arr.col(nc).sum(); - } - saved_mean_e /= N * sample_size; - for (int nc = 0; nc < N * C; ++nc) { - saved_variance_e(nc % C) += - (x_arr.col(nc) - saved_mean_e(nc % C)).matrix().squaredNorm(); - } - saved_variance_e /= N * sample_size; - break; - } - case DataLayout::kNHWC: { - ConstEigenArrayMap x_arr(x->data(), C, N * sample_size); - for (int i = 0; i < N * sample_size; ++i) { - saved_mean_e += x_arr.col(i); - } - saved_mean_e /= N * sample_size; - for (int i = 0; i < N * sample_size; ++i) { - saved_variance_e += - (x_arr.col(i) - saved_mean_e) * (x_arr.col(i) - saved_mean_e); - } - saved_variance_e /= N * sample_size; - break; - } - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Unknown storage order: %s", data_layout_str)); - } - - // if MomentumTensor is set, use MomentumTensor value, momentum - // is only used in this training branch - if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); - momentum = mom_tensor->data()[0]; - } - - running_mean_arr = - running_mean_arr * momentum + saved_mean_e * (1. - momentum); - running_var_arr = - running_var_arr * momentum + saved_variance_e * (1. - momentum); - } - - // use SavedMean and SavedVariance to do normalize - Eigen::Array inv_std(C); - if (global_stats) { - ConstEigenVectorArrayMap var_arr( - ctx.Input("Variance")->data(), C); - inv_std = (var_arr + epsilon).sqrt().inverse(); - } else { - EigenVectorArrayMap saved_inv_std( - ctx.Output("SavedVariance")->data(), C); - // inverse SavedVariance first, gradient will use it too. - saved_inv_std = (saved_inv_std + epsilon).inverse().sqrt(); - inv_std = saved_inv_std; - } - ConstEigenVectorArrayMap mean_arr( - global_stats ? ctx.Input("Mean")->data() - : ctx.Output("SavedMean")->data(), - C); - - // ((x - est_mean) * (inv_var) * scale + bias - // formula transform ====> - // (x * inv_var * scale) + (bias - est_mean * inv_var * scale) - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - ConstEigenVectorArrayMap scale_arr(scale->data(), C); - ConstEigenVectorArrayMap bias_arr(bias->data(), C); - Eigen::Array new_scale = inv_std * scale_arr; - Eigen::Array new_bias = - bias_arr - mean_arr * inv_std * scale_arr; - - switch (data_layout) { - case DataLayout::kNCHW: { - EigenArrayMap y_arr(y->mutable_data(ctx.GetPlace()), sample_size, - N * C); - ConstEigenArrayMap x_arr(x->data(), sample_size, N * C); - for (int nc = 0; nc < N * C; ++nc) { - y_arr.col(nc) = x_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C); - } - break; - } - case DataLayout::kNHWC: { - EigenArrayMap(y->mutable_data(ctx.GetPlace()), C, - N * sample_size) = - (ConstEigenArrayMap(x->data(), C, N * sample_size).colwise() * - new_scale) - .colwise() + - new_bias; - break; - } - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Unknown storage order: %d", data_layout)); - } - } -}; - void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { // check input OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNormGrad"); @@ -585,261 +410,6 @@ framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar( tensor.place(), tensor.layout()); } -template -class BatchNormGradKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *saved_mean = ctx.Input("SavedMean"); - // SavedVariance have been reverted in forward operator - const auto *saved_inv_variance = ctx.Input("SavedVariance"); - const std::string data_layout_str = ctx.Attr("data_layout"); - bool use_global_stats = ctx.Attr("use_global_stats"); - const bool is_test = ctx.Attr("is_test"); - const float epsilon = ctx.Attr("epsilon"); - DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - - use_global_stats = is_test || use_global_stats; - - // batch_norm with inplace as false will take X as grad input, which - // is same as cuDNN batch_norm backward calculation, batch_norm - // with inplace as true only take Y as input and X should be calculate - // by inverse operation of batch_norm on Y - const Tensor *x; - bool is_inplace; - if (ctx.HasInput("Y")) { - x = ctx.Input("Y"); - is_inplace = true; - // if the input of batch norm is stop_gradient, d_x is null. - if (d_x) { - PADDLE_ENFORCE_EQ(d_x, d_y, - platform::errors::InvalidArgument( - "X@GRAD and Y@GRAD not inplace in inplace mode")); - } - } else { - x = ctx.Input("X"); - is_inplace = false; - if (d_x) { - PADDLE_ENFORCE_NE( - d_x, d_y, platform::errors::InvalidArgument( - "X@GRAD and Y@GRAD inplaced in non-inplace mode")); - } - } - - // Get the size for each dimension. - // NCHW [batch_size, in_channels, in_height, in_width] - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE( - x_dims.size(), 2, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be larger than 1." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - PADDLE_ENFORCE_LE( - x_dims.size(), 5, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be less than 6." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - const int N = x_dims[0]; - const int C = - (data_layout == DataLayout::kNCHW ? x_dims[1] - : x_dims[x_dims.size() - 1]); - const int sample_size = x->numel() / N / C; - - // input dimension is 2 and the format is NCHW. The input can be regarded as - // NHWC format - if (x_dims.size() == 2 && data_layout == DataLayout::kNCHW) { - data_layout = DataLayout::kNHWC; - } - - // init output - if (d_x) { - d_x->mutable_data(ctx.GetPlace()); - } - - const T *mean_data = saved_mean->data(); - const T *inv_var_data = saved_inv_variance->data(); - Tensor inv_var_tensor; - if (use_global_stats) { - const auto *running_mean = ctx.Input("Mean"); - const auto *running_variance = ctx.Input("Variance"); - mean_data = running_mean->data(); - inv_var_tensor.Resize({C}); - T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); - EigenVectorArrayMap inv_var_tmp(running_inv_var_data, C); - ConstEigenVectorArrayMap var_arr(running_variance->data(), C); - - inv_var_tmp = (var_arr + epsilon).sqrt().inverse(); - inv_var_data = running_inv_var_data; - } - - ConstEigenVectorArrayMap scale_arr(scale->data(), C); - ConstEigenVectorArrayMap bias_arr(bias->data(), C); - ConstEigenVectorArrayMap mean_arr(mean_data, C); - ConstEigenVectorArrayMap inv_var_arr(inv_var_data, C); - - T *d_bias_data = nullptr; - T *d_scale_data = nullptr; - if (d_scale && d_bias) { - d_scale->mutable_data(ctx.GetPlace()); - d_bias->mutable_data(ctx.GetPlace()); - d_bias_data = d_bias->mutable_data(ctx.GetPlace()); - d_scale_data = d_scale->mutable_data(ctx.GetPlace()); - } - - // d_bias = np.sum(d_y, axis=0) - // d_scale = np.sum((X - mean) / inv_std * dy, axis=0) - // d_x = (1. / N) * scale * inv_var * (N * d_y - np.sum(d_y, axis=0) - // - (X - mean) * inv_var * inv_var * np.sum(d_y * (X - mean), axis=0)) - EigenVectorArrayMap d_bias_arr(d_bias_data, C); - EigenVectorArrayMap d_scale_arr(d_scale_data, C); - - if (d_scale && d_bias) { - d_bias_arr.setZero(); - d_scale_arr.setZero(); - } - - if (d_x && (N * sample_size) == 1 && !use_global_stats) { - framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); - return; - } - - int scale_coefff = use_global_stats ? 1 : N * sample_size; - const auto scale_inv_var_nhw = scale_arr * inv_var_arr / scale_coefff; - - Tensor dy_sum; - dy_sum.Resize({C}); - dy_sum.mutable_data(ctx.GetPlace()); - EigenVectorArrayMap dy_sum_arr(dy_sum.mutable_data(ctx.GetPlace()), - C); - - Tensor dy_mul_x_sub_mean_mul_invstd_sum; - dy_mul_x_sub_mean_mul_invstd_sum.Resize({C}); - dy_mul_x_sub_mean_mul_invstd_sum.mutable_data(ctx.GetPlace()); - EigenVectorArrayMap dy_mul_x_sub_mean_mul_invstd_sum_arr( - dy_mul_x_sub_mean_mul_invstd_sum.mutable_data(ctx.GetPlace()), C); - - dy_sum_arr.setZero(); - dy_mul_x_sub_mean_mul_invstd_sum_arr.setZero(); - - // inplace calculation - // Y: ((x - est_mean) * (inv_var) * scale + bias - // formula transform ====> - // (x * inv_var * scale) + (bias - est_mean * inv_var * scale) - // X: (y - bias) / scale / (inv_var) + est_mean - // formula transform ====> - // (y - bias) / (scale * inv_var) + est_mean - switch (data_layout) { - case DataLayout::kNCHW: { - if (is_inplace) { - auto px = *x; - EigenArrayMap x_data(px.mutable_data(ctx.GetPlace()), - sample_size, N * C); - ConstEigenArrayMap y_data(x->data(), sample_size, N * C); - for (int nc = 0; nc < N * C; ++nc) { - x_data.col(nc) = (y_data.col(nc) - bias_arr(nc % C)) / - scale_inv_var_nhw(nc % C) / scale_coefff + - mean_arr(nc % C); - } - } - ConstEigenArrayMap x_arr(x->data(), sample_size, N * C); - ConstEigenArrayMap d_y_arr(d_y->data(), sample_size, N * C); - - for (int nc = 0; nc < N * C; ++nc) { - int c = nc % C; - dy_sum_arr(c) += d_y_arr.col(nc).sum(); - dy_mul_x_sub_mean_mul_invstd_sum_arr(c) += - ((x_arr.col(nc) - mean_arr(c)) * inv_var_arr(c) * d_y_arr.col(nc)) - .sum(); - } - - if (d_scale && d_bias) { - d_bias_arr = dy_sum_arr; - d_scale_arr = dy_mul_x_sub_mean_mul_invstd_sum_arr; - } - - if (d_x) { - EigenArrayMap d_x_arr(d_x->mutable_data(ctx.GetPlace()), - sample_size, N * C); - if (!use_global_stats) { - for (int nc = 0; nc < N * C; ++nc) { - int c = nc % C; - d_x_arr.col(nc) = - scale_inv_var_nhw(c) * - (d_y_arr.col(nc) * N * sample_size - dy_sum_arr(c) - - (x_arr.col(nc) - mean_arr[c]) * - dy_mul_x_sub_mean_mul_invstd_sum_arr(c) * - inv_var_arr(c)); - } - } else { - for (int nc = 0; nc < N * C; ++nc) { - int c = nc % C; - d_x_arr.col(nc) = scale_inv_var_nhw(c) * d_y_arr.col(nc); - } - } - } - break; - } - case DataLayout::kNHWC: { - if (is_inplace) { - auto px = *x; - EigenArrayMap x_data(px.mutable_data(ctx.GetPlace()), C, - N * sample_size); - ConstEigenArrayMap y_data(x->data(), C, N * sample_size); - for (int nhw = 0; nhw < N * sample_size; nhw++) { - x_data.col(nhw) = (y_data.col(nhw) - bias_arr) / scale_inv_var_nhw / - scale_coefff + - mean_arr; - } - } - ConstEigenArrayMap x_arr(x->data(), C, N * sample_size); - ConstEigenArrayMap d_y_arr(d_y->data(), C, N * sample_size); - - for (int nhw = 0; nhw < N * sample_size; ++nhw) { - dy_sum_arr += d_y_arr.col(nhw); - dy_mul_x_sub_mean_mul_invstd_sum_arr += - (x_arr.col(nhw) - mean_arr) * inv_var_arr * d_y_arr.col(nhw); - } - - if (d_scale && d_bias) { - d_bias_arr = dy_sum_arr; - d_scale_arr = dy_mul_x_sub_mean_mul_invstd_sum_arr; - } - - if (d_x) { - EigenArrayMap d_x_arr(d_x->mutable_data(ctx.GetPlace()), C, - N * sample_size); - if (!use_global_stats) { - for (int nhw = 0; nhw < N * sample_size; ++nhw) { - d_x_arr.col(nhw) = - scale_inv_var_nhw * - (d_y_arr.col(nhw) * N * sample_size - dy_sum_arr - - (x_arr.col(nhw) - mean_arr) * - dy_mul_x_sub_mean_mul_invstd_sum_arr * inv_var_arr); - } - } else { - for (int nhw = 0; nhw < N * sample_size; ++nhw) { - d_x_arr.col(nhw) = scale_inv_var_nhw * d_y_arr.col(nhw); - } - } - } - break; - } - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Unknown storage order: %s", data_layout_str)); - } - } -}; - template void BatchNormGradMaker::Apply(GradOpPtr op) const { op->SetType(this->ForwardOpType() + "_grad"); @@ -951,335 +521,16 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); } -template -class BatchNormDoubleGradKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const auto *X = ctx.Input("X"); - const auto *Scale = ctx.Input("Scale"); - const auto *dY = ctx.Input("DY"); - const auto *Saved_mean = ctx.Input("SavedMean"); - const auto *Saved_variance = ctx.Input("SavedVariance"); - const float epsilon = ctx.Attr("epsilon"); - const bool use_global_stats = ctx.Attr("use_global_stats"); - const bool is_test = ctx.Attr("is_test"); - - PADDLE_ENFORCE_EQ( - is_test, false, - platform::errors::InvalidArgument( - "`is_test = True` CANNOT be used in train program. If " - "you want to use global status in pre_train model, " - "please set `use_global_stats = True`")); - - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = - framework::StringToDataLayout(data_layout_str); - - const auto *ddX = ctx.Input("DDX"); - const auto *ddScale = ctx.Input("DDScale"); - const auto *ddBias = ctx.Input("DDBias"); - - auto *dX = ctx.Output("DX"); - auto *dScale = ctx.Output("DScale"); - auto *ddY = ctx.Output("DDY"); - dX->mutable_data(ctx.GetPlace()); - ddY->mutable_data(ctx.GetPlace()); - - auto &dev_ctx = ctx.template device_context(); - - const auto &x_dims = X->dims(); - const int C = - (data_layout == DataLayout::kNCHW ? x_dims[1] - : x_dims[x_dims.size() - 1]); - const int sample_size = X->numel() / C; - phi::funcs::SetConstant set_constant; - - const T *mean_data = Saved_mean->data(); - const T *inv_var_data = Saved_variance->data(); - - Tensor inv_var_tensor; - if (use_global_stats) { - const auto *running_mean = ctx.Input("Mean"); - const auto *running_variance = ctx.Input("Variance"); - mean_data = running_mean->data(); - inv_var_tensor.Resize({C}); - - T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); - EigenVectorArrayMap inv_var_tmp(running_inv_var_data, C); - ConstEigenVectorArrayMap var_arr(running_variance->data(), C); - - inv_var_tmp = (var_arr + epsilon).sqrt().inverse(); - inv_var_data = running_inv_var_data; - } - - // transpose NCHW -> NHWC for easy calculate - Tensor transformed_x(X->type()); - Tensor transformed_dy(dY->type()); - Tensor transformed_ddx(ddX->type()); - - Tensor transformed_dx(dX->type()); - Tensor transformed_ddy(ddY->type()); - if (data_layout == DataLayout::kNCHW && x_dims.size() > 2) { - VLOG(3) << "Transform batchnorm output from NCHW to NHWC"; - // Input Tensor - ResizeToChannelLast(ctx, X, - &transformed_x); - TransToChannelLast(ctx, X, &transformed_x); - ResizeToChannelLast(ctx, dY, - &transformed_dy); - TransToChannelLast(ctx, dY, - &transformed_dy); - ResizeToChannelLast(ctx, ddX, - &transformed_ddx); - TransToChannelLast(ctx, ddX, - &transformed_ddx); - // Output Tensor - ResizeToChannelLast(ctx, dX, - &transformed_dx); - ResizeToChannelLast(ctx, ddY, - &transformed_ddy); - } else { - transformed_x.ShareDataWith(*X); - transformed_dy.ShareDataWith(*dY); - transformed_ddx.ShareDataWith(*ddX); - - transformed_dx.ShareDataWith(*dX); - transformed_ddy.ShareDataWith(*ddY); - } - - ConstEigenArrayMap x_arr(transformed_x.data(), C, sample_size); - ConstEigenVectorArrayMap mean_arr(mean_data, C); - ConstEigenVectorArrayMap inv_var_arr(inv_var_data, C); - - Tensor mean_tile; - mean_tile.Resize({C, sample_size}); - mean_tile.mutable_data(ctx.GetPlace()); - EigenArrayMap mean_tile_data(mean_tile.mutable_data(ctx.GetPlace()), - C, sample_size); - - Tensor inv_var_tile; - inv_var_tile.Resize({C, sample_size}); - inv_var_tile.mutable_data(ctx.GetPlace()); - EigenArrayMap inv_var_tile_data( - inv_var_tile.mutable_data(ctx.GetPlace()), C, sample_size); - - mean_tile_data = mean_arr.replicate(1, sample_size); - inv_var_tile_data = inv_var_arr.replicate(1, sample_size); - - Tensor Scale_data; - if (!Scale) { - Scale_data.mutable_data({C}, ctx.GetPlace()); - set_constant(dev_ctx, &Scale_data, static_cast(1)); - } - ConstEigenVectorArrayMap scale_arr( - Scale ? Scale->data() : Scale_data.data(), C); - - Tensor scale_tile; - scale_tile.Resize({C, sample_size}); - scale_tile.mutable_data(ctx.GetPlace()); - EigenArrayMap scale_tile_data(scale_tile.mutable_data(ctx.GetPlace()), - C, sample_size); - scale_tile_data = scale_arr.replicate(1, sample_size); - - ConstEigenArrayMap dy_arr(transformed_dy.data(), C, sample_size); - ConstEigenArrayMap ddx_arr(transformed_ddx.data(), C, sample_size); - - Tensor x_sub_mean_mul_invstd; - x_sub_mean_mul_invstd.Resize({C, sample_size}); - x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()); - EigenArrayMap x_sub_mean_mul_invstd_arr( - x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()), C, sample_size); - x_sub_mean_mul_invstd_arr = (x_arr - mean_tile_data) * inv_var_tile_data; - - if (dX) { - dX->mutable_data(ctx.GetPlace()); - EigenArrayMap dx_arr(transformed_dx.mutable_data(ctx.GetPlace()), C, - sample_size); - dx_arr.setZero(); - if (use_global_stats) { - // math: dx = (ddscale * dy) * inv_var - if (ddScale) { - ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); - Tensor ddscale_tile; - ddscale_tile.Resize({C, sample_size}); - EigenArrayMap ddscale_tile_data( - ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddscale_tile_data = ddscale_arr.replicate(1, sample_size); - - dx_arr = dy_arr * ddscale_tile_data * inv_var_tile_data; - } - } else { - // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, - // axis=(n,h,w)) * - // np.sum(dy, axis=(n,h,w)) - - // np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x - - // mean), - // axis=(n,h,w)) * inv_var.pow(2) * - // np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) / - // NxHxW * - // np.sum(ddx * (x - mean)) * - // (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * - // np.sum(dy, - // axis=(n,h,w)) * (x - mean) * - // (np.mean(ddx, axis=(n,h,w)) - ddx)) + ddr * (dy * inv_var - - // inv_var - // * - // np.mean(dy, axis=(n,h,w)) - - // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), - // axis=(n,h,w))) - - if (ddX) { - dx_arr += - (x_sub_mean_mul_invstd_arr * inv_var_tile_data * - inv_var_tile_data / sample_size) - .colwise() * - (ddx_arr.rowwise().sum() * dy_arr.rowwise().sum() / sample_size - - (dy_arr * ddx_arr).rowwise().sum() + - 3. * (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() * - (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / - sample_size); - - dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * - (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / - sample_size * - (dy_arr.rowwise().sum() / sample_size - dy_arr); - - dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * - (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / - sample_size * - (ddx_arr.rowwise().sum() / sample_size - ddx_arr); - - dx_arr = scale_tile_data * dx_arr; - } - if (ddScale) { - ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); - Tensor ddscale_tile; - ddscale_tile.Resize({C, sample_size}); - EigenArrayMap ddscale_tile_data( - ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddscale_tile_data = ddscale_arr.replicate(1, sample_size); - - dx_arr += (dy_arr * inv_var_tile_data - - (dy_arr.rowwise().sum().replicate(1, sample_size) / - sample_size) * - inv_var_tile_data - - x_sub_mean_mul_invstd_arr * inv_var_tile_data * - (dy_arr * x_sub_mean_mul_invstd_arr) - .rowwise() - .sum() - .replicate(1, sample_size) / - sample_size) * - ddscale_tile_data; - } - } - if (data_layout == DataLayout::kNCHW) { - VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; - TransToChannelFirst( - ctx, &transformed_dx, dX); - } - } - if (dScale) { - dScale->mutable_data(ctx.GetPlace()); - EigenVectorArrayMap dscale_arr(dScale->mutable_data(ctx.GetPlace()), - C); - dscale_arr.setZero(); - if (use_global_stats) { - // math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var - if (ddX) { - dscale_arr = (ddx_arr * dy_arr * inv_var_tile_data).rowwise().sum(); - } - } else { - // math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) * - // inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) * - // ddx - if (ddX) { - Tensor first_grad; - first_grad.Resize({C, sample_size}); - EigenArrayMap first_grad_arr( - first_grad.mutable_data(ctx.GetPlace()), C, sample_size); - first_grad_arr.setZero(); - - first_grad_arr += - inv_var_tile_data * - (dy_arr - - dy_arr.rowwise().sum().replicate(1, sample_size) / sample_size - - x_sub_mean_mul_invstd_arr * - (dy_arr * x_sub_mean_mul_invstd_arr) - .rowwise() - .sum() - .replicate(1, sample_size) / - sample_size); - dscale_arr = (first_grad_arr * ddx_arr).rowwise().sum(); - } - } - } - - if (ddY) { - ddY->mutable_data(ctx.GetPlace()); - EigenArrayMap ddy_arr(transformed_ddy.mutable_data(ctx.GetPlace()), - C, sample_size); - ddy_arr.setZero(); - if (use_global_stats) { - // math: ddy = r * ddx * inv_var + ddbias + - // ddscale * (x - mean) * inv_var - if (ddX) { - ddy_arr = scale_tile_data * ddx_arr * inv_var_tile_data; - } - } else { - // math: ddy = (x - mean) * inv_var * ddscale + ddbias + - // scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) * - // np.mean(ddx * (x - mean), axis=(n,h,w))) - if (ddX) { - ddy_arr += - scale_tile_data * inv_var_tile_data * - (ddx_arr - - ddx_arr.rowwise().sum().replicate(1, sample_size) / sample_size - - x_sub_mean_mul_invstd_arr * - (ddx_arr * x_sub_mean_mul_invstd_arr) - .rowwise() - .sum() - .replicate(1, sample_size) / - sample_size); - } - } - if (ddScale) { - ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); - Tensor ddscale_tile; - ddscale_tile.Resize({C, sample_size}); - EigenArrayMap ddscale_tile_data( - ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddscale_tile_data = ddscale_arr.replicate(1, sample_size); - - ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data; - } - - if (ddBias) { - ConstEigenVectorArrayMap ddbias_arr(ddBias->data(), C); - Tensor ddbias_tile; - ddbias_tile.Resize({C, sample_size}); - EigenArrayMap ddbias_tile_data( - ddbias_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddbias_tile_data = ddbias_arr.replicate(1, sample_size); - - ddy_arr += ddbias_tile_data; - } - - if (data_layout == DataLayout::kNCHW) { - VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; - TransToChannelFirst( - ctx, &transformed_ddy, ddY); - } - } - } -}; - DECLARE_INPLACE_OP_INFERER(BatchNormDoubleGradOpInplaceInferer, {"DY", "DDY"}); } // namespace operators } // namespace paddle namespace ops = paddle::operators; + +DECLARE_INFER_SHAPE_FUNCTOR(batch_norm, BatchNormInferShapeFunctor, + PD_INFER_META(phi::BatchNormInferMeta)); + REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, ops::BatchNormOpInferVarType, ops::BatchNormGradMaker, diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index f8d37d685b..d274e8d2c0 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -113,23 +113,5 @@ class BatchNormOpInferVarType } }; -template -class BatchNormKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override; -}; - -template -class BatchNormGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override; -}; - -template -class BatchNormDoubleGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override; -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 8213e877f7..9be63a85fc 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -27,6 +27,9 @@ limitations under the License. */ #endif #include "paddle/fluid/platform/cudnn_workspace_helper.h" +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/infermeta/binary.h" + namespace paddle { namespace operators { @@ -841,6 +844,8 @@ framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType( } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(conv2d, Conv2dInferShapeFunctor, + PD_INFER_META(phi::ConvInferMeta)); REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker, ops::ConvOpInferVarType, ops::Conv2DGradMaker, @@ -851,6 +856,8 @@ REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad, REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad); // depthwise convolution op +DECLARE_INFER_SHAPE_FUNCTOR(depthwise_conv2d, DepthwiseConv2dInferShapeFunctor, + PD_INFER_META(phi::ConvInferMeta)); REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, ops::ConvOpInferVarType, ops::Conv2DGradMaker, @@ -860,6 +867,8 @@ REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad, ops::Conv2DDoubleGradMaker); REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad); +DECLARE_INFER_SHAPE_FUNCTOR(conv3d, Conv3dInferShapeFunctor, + PD_INFER_META(phi::ConvInferMeta)); REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker, ops::ConvOpInferVarType, ops::Conv3DGradMaker, diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index 0d9fbf612f..35e3890901 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -9,8 +9,10 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/phi/infermeta/binary.h" namespace paddle { namespace operators { @@ -235,10 +237,13 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(yolo_box, YoloBoxInferShapeFunctor, + PD_INFER_META(phi::YoloBoxInferMeta)); REGISTER_OPERATOR( yolo_box, ops::YoloBoxOp, ops::YoloBoxOpMaker, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); + paddle::framework::EmptyGradOpMaker, + YoloBoxInferShapeFunctor); REGISTER_OP_VERSION(yolo_box) .AddCheckpoint( diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 6d52ce45c4..3d9950902a 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -14,7 +14,9 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" namespace paddle { namespace operators { @@ -25,17 +27,6 @@ class DropoutOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Dropout"); - - auto x_dims = ctx->GetInputDim("X"); - ctx->SetOutputDim("Out", x_dims); - if (ctx->Attrs().Get("is_test") == false) { - ctx->SetOutputDim("Mask", x_dims); - } - ctx->ShareLoD("X", /*->*/ "Out"); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -173,7 +164,11 @@ class DropoutGradOpMaker : public framework::SingleGradOpMaker { } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(dropout, DropoutInferShapeFunctor, + PD_INFER_META(phi::DropoutInferMeta)); + REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker, ops::DropoutGradOpMaker, - ops::DropoutGradOpMaker); + ops::DropoutGradOpMaker, + DropoutInferShapeFunctor); REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad); diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc index c445a28c08..e60fc44e9a 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cc +++ b/paddle/fluid/operators/fused/conv_fusion_op.cc @@ -120,6 +120,142 @@ class Conv2DFusionOp : public operators::ConvOp { ctx->SetOutputsDim("Outputs", output_shapes); } } + + std::vector ComputeOutputShape( + framework::InferShapeContext* ctx) const { + OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv"); + OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv"); + + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + std::string padding_algorithm = + ctx->Attrs().Get("padding_algorithm"); + int groups = ctx->Attrs().Get("groups"); + std::vector dilations = + ctx->Attrs().Get>("dilations"); + int dilation_size = dilations.size(); + for (int i = 0; i < dilation_size; ++i) { + PADDLE_ENFORCE_GT( + dilations[i], 0, + platform::errors::InvalidArgument( + "The dilation of Op(Conv) should be larget than 0, but received " + "dilation is %d.", + dilations[i])); + } + const std::string data_format = + ctx->Attrs().Get("data_format"); + + // MKL-DNN Kernels are using NCHW order of dims description + // so we ignore data_format consideration for MKL-DNN kernel + const bool channel_last = (ctx->IsRunMKLDNNKernel() == false) && + (data_format == "NHWC" || data_format == "NDHWC"); + + PADDLE_ENFORCE_EQ( + in_dims.size() == 4 || in_dims.size() == 5, true, + platform::errors::InvalidArgument( + "The input of Op(Conv) should be a 4-D or 5-D Tensor. But " + "received: input's dimension is %u, input's shape is [%s].", + in_dims.size(), in_dims)); + + PADDLE_ENFORCE_EQ( + in_dims.size(), filter_dims.size(), + platform::errors::InvalidArgument( + "The input's dimension and filter's dimension of " + "Op(Conv) should be equal. But received: the input's shape is " + "[%s], " + "the input's dimension is %d; the filter's shape is [%s], " + "the filter's dimension is %d.", + in_dims, in_dims.size(), filter_dims, filter_dims.size())); + + int stride_size = strides.size(); + for (int i = 0; i < stride_size; ++i) { + PADDLE_ENFORCE_GT( + strides[i], 0, + platform::errors::InvalidArgument( + "The stride of Op(Conv) should be larget than 0, but received " + "stride is %d.", + strides[i])); + } + + int in_sub_stride_size = in_dims.size() - stride_size; + PADDLE_ENFORCE_EQ( + in_dims.size(), strides.size() + 2U, + platform::errors::InvalidArgument( + "The difference of input's dimension and Attr(strides)'s " + "length must be euqal to 2 for Op(Conv). " + "But received: input's dimension is %d, input's shape is [%s]; " + "Attr(stride)'s length is %d, Attr(stride) is [%s]; " + "difference of input's dimention and Attr(strides)'s length = %u.", + in_dims.size(), in_dims, strides.size(), phi::make_ddim(strides), + in_sub_stride_size)); + + const auto input_channels = + channel_last ? in_dims[in_dims.size() - 1] : in_dims[1]; + + PADDLE_ENFORCE_EQ( + input_channels, filter_dims[1] * groups, + platform::errors::InvalidArgument( + "The number of input's channels should be equal to filter's " + "channels " + "* groups for Op(Conv). But received: the input's channels is %d, " + "the input's shape is [%s]; the filter's channels is %d, the " + "filter's shape is [%s]; the groups is %d, the data_format is %s. " + "The error may come from wrong data_format setting.", + input_channels, in_dims, filter_dims[1], filter_dims, groups, + data_format)); + PADDLE_ENFORCE_EQ( + filter_dims[0] % groups, 0, + platform::errors::InvalidArgument( + "The number of output's channels (filter's first dimension) of " + "Op(Conv) should be divided by groups. But received: " + "the output channels is %d, the filter's shape is [%s], " + "the groups is %d.", + filter_dims[0], filter_dims, groups)); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_GT( + filter_dims[0], 0, + platform::errors::InvalidArgument( + "the size of filter at axis 0 should be greater than 0")); + } + + framework::DDim in_data_dims; + if (channel_last) { + in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); + } else { + in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); + } + + framework::DDim filter_data_dims = + phi::slice_ddim(filter_dims, 2, filter_dims.size()); + + std::vector ksize = phi::vectorize(filter_data_dims); + UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, + in_data_dims, strides, ksize); + + std::vector output_shape({in_dims[0]}); + if (!channel_last) { + output_shape.push_back(filter_dims[0]); + } + for (int i = 0; i < in_data_dims.size(); ++i) { + if ((!ctx->IsRuntime()) && + (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) { + output_shape.push_back(-1); + } else { + output_shape.push_back( + ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i], + paddings[2 * i], paddings[2 * i + 1], strides[i])); + } + } + if (channel_last) { + output_shape.push_back(filter_dims[0]); + } + + return output_shape; + } }; // TODO(qingqing): add gradient operator for conv2d_fusion diff --git a/paddle/fluid/operators/histogram_op.cc b/paddle/fluid/operators/histogram_op.cc index 92cc6077de..c9fd75651b 100644 --- a/paddle/fluid/operators/histogram_op.cc +++ b/paddle/fluid/operators/histogram_op.cc @@ -16,7 +16,9 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" namespace paddle { namespace operators { @@ -28,27 +30,6 @@ class HistogramOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "histogram"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "histogram"); - const auto &nbins = ctx->Attrs().Get("bins"); - const auto &minval = ctx->Attrs().Get("min"); - const auto &maxval = ctx->Attrs().Get("max"); - - PADDLE_ENFORCE_GE(nbins, 1, - platform::errors::InvalidArgument( - "The bins should be greater than or equal to 1." - "But received nbins is %d", - nbins)); - PADDLE_ENFORCE_GE(maxval, minval, platform::errors::InvalidArgument( - "max must be larger or equal to min." - "But received max is %d, min is %d", - maxval, minval)); - - ctx->SetOutputDim("Out", phi::make_ddim({nbins})); - ctx->ShareLoD("X", /*->*/ "Out"); - } - framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); @@ -81,7 +62,12 @@ class HistogramOpMaker : public framework::OpProtoAndCheckerMaker { } // namespace paddle namespace ops = paddle::operators; + +DECLARE_INFER_SHAPE_FUNCTOR(histogram, HistogramInferShapeFunctor, + PD_INFER_META(phi::HistogramInferMeta)); + REGISTER_OPERATOR( histogram, ops::HistogramOp, ops::HistogramOpMaker, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); + paddle::framework::EmptyGradOpMaker, + HistogramInferShapeFunctor); diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index 7f51369699..77951ff394 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -323,6 +323,7 @@ class InplaceABNGradKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; + REGISTER_OPERATOR(inplace_abn, ops::InplaceABNOp, ops::InplaceABNOpMaker, ops::BatchNormOpInferVarType, ops::InplaceABNOpGradMaker, diff --git a/paddle/fluid/operators/masked_select_op.cc b/paddle/fluid/operators/masked_select_op.cc index a6eb535c69..1887bbcfb7 100644 --- a/paddle/fluid/operators/masked_select_op.cc +++ b/paddle/fluid/operators/masked_select_op.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/binary.h" namespace paddle { namespace operators { @@ -21,16 +23,6 @@ class MaskedSelectOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "Input", "MaskedSelect"); - OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "MaskedSelect"); - OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Out", "MaskedSelect"); - - // output will only be a 1-D Tensor - ctx->SetOutputDim("Y", phi::make_ddim({-1})); - ctx->ShareLoD("X", /*->*/ "Y"); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -100,8 +92,13 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(MaskedSelectedGradNoNeedBufferVarsInferer, } // namespace paddle namespace ops = paddle::operators; + +DECLARE_INFER_SHAPE_FUNCTOR(masked_select, MaksedSelectInferShapeFunctor, + PD_INFER_META(phi::MaskedSelectInferMeta)); + REGISTER_OPERATOR(masked_select, ops::MaskedSelectOp, ops::MaskedSelectOpMaker, ops::MaskedSelectGradOpMaker, - ops::MaskedSelectGradOpMaker); + ops::MaskedSelectGradOpMaker, + MaksedSelectInferShapeFunctor); REGISTER_OPERATOR(masked_select_grad, ops::MaskedSelectOpGrad, ops::MaskedSelectedGradNoNeedBufferVarsInferer); diff --git a/paddle/fluid/operators/norm_op.cc b/paddle/fluid/operators/norm_op.cc index 5d394424d5..51daccce0e 100644 --- a/paddle/fluid/operators/norm_op.cc +++ b/paddle/fluid/operators/norm_op.cc @@ -15,7 +15,9 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" namespace paddle { namespace operators { @@ -57,21 +59,7 @@ where, $\sum {x^2}$ is calculated along the `axis` dimension. }; class NormOp : public framework::OperatorWithKernel { - public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "NormOp"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NormOp"); - auto xdim = ctx->GetInputDim("X"); - ctx->SetOutputDim("Out", xdim); - - if (ctx->Attrs().Get("is_test") == false) { - int axis = ctx->Attrs().Get("axis"); - if (axis < 0) axis = xdim.size() + axis; - xdim[axis] = 1; - ctx->SetOutputDim("Norm", xdim); - } - } }; class NormOpGrad : public framework::OperatorWithKernel { @@ -111,7 +99,11 @@ class NormOpGradOpMaker : public framework::SingleGradOpMaker { namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; +DECLARE_INFER_SHAPE_FUNCTOR(norm, NormInferShapeFunctor, + PD_INFER_META(phi::NormInferMeta)); + REGISTER_OPERATOR(norm, ops::NormOp, ops::NormOpMaker, ops::NormOpGradOpMaker, - ops::NormOpGradOpMaker); + ops::NormOpGradOpMaker, + NormInferShapeFunctor); REGISTER_OPERATOR(norm_grad, ops::NormOpGrad); diff --git a/paddle/fluid/operators/sync_batch_norm_op.cc b/paddle/fluid/operators/sync_batch_norm_op.cc index d198992abd..0c178b02d0 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cc +++ b/paddle/fluid/operators/sync_batch_norm_op.cc @@ -50,6 +50,7 @@ class SyncBatchNormGradMaker : public framework::SingleGradOpMaker { } // namespace paddle namespace ops = paddle::operators; + REGISTER_OPERATOR(sync_batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, ops::BatchNormOpInferVarType, ops::SyncBatchNormGradMaker, diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index f09e878947..aabb944db3 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/kernels/funcs/common_shape.h" +#include "paddle/phi/kernels/cpu/conv_util.h" + namespace phi { namespace detail { @@ -355,6 +357,161 @@ void CrossInferMeta(const MetaTensor& x, out->share_lod(x); } +void ConvInferMeta(const MetaTensor& input, + const MetaTensor& filter, + const std::vector& strides, + const std::vector& paddings_t, + const std::string& padding_algorithm, + int groups, + const std::vector& dilations_t, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + MetaTensor* out, + MetaConfig config) { + std::vector paddings = paddings_t; + std::vector dilations = dilations_t; + auto in_dims = input.dims(); + auto filter_dims = filter.dims(); + int dilation_size = dilations.size(); + for (int i = 0; i < dilation_size; ++i) { + PADDLE_ENFORCE_GT( + dilations[i], + 0, + phi::errors::InvalidArgument( + "The dilation of Op(Conv) should be larget than 0, but received " + "dilation is %d.", + dilations[i])); + } + const bool channel_last = (config.is_run_mkldnn_kernel == false) && + (data_format == "NHWC" || data_format == "NDHWC"); + + PADDLE_ENFORCE_EQ( + in_dims.size() == 4 || in_dims.size() == 5, + true, + phi::errors::InvalidArgument( + "The input of Op(Conv) should be a 4-D or 5-D Tensor. But " + "received: input's dimension is %u, input's shape is [%s].", + in_dims.size(), + in_dims)); + + PADDLE_ENFORCE_EQ( + in_dims.size(), + filter_dims.size(), + phi::errors::InvalidArgument( + "The input's dimension and filter's dimension of " + "Op(Conv) should be equal. But received: the input's shape is [%s], " + "the input's dimension is %d; the filter's shape is [%s], " + "the filter's dimension is %d.", + in_dims, + in_dims.size(), + filter_dims, + filter_dims.size())); + + int stride_size = strides.size(); + for (int i = 0; i < stride_size; ++i) { + PADDLE_ENFORCE_GT( + strides[i], + 0, + phi::errors::InvalidArgument( + "The stride of Op(Conv) should be larget than 0, but received " + "stride is %d.", + strides[i])); + } + + int in_sub_stride_size = in_dims.size() - stride_size; + PADDLE_ENFORCE_EQ( + in_dims.size(), + strides.size() + 2U, + phi::errors::InvalidArgument( + "The difference of input's dimension and Attr(strides)'s " + "length must be euqal to 2 for Op(Conv). " + "But received: input's dimension is %d, input's shape is [%s]; " + "Attr(stride)'s length is %d, Attr(stride) is [%s]; " + "difference of input's dimention and Attr(strides)'s length = %u.", + in_dims.size(), + in_dims, + strides.size(), + phi::make_ddim(strides), + in_sub_stride_size)); + + const auto input_channels = + channel_last ? in_dims[in_dims.size() - 1] : in_dims[1]; + + PADDLE_ENFORCE_EQ( + input_channels, + filter_dims[1] * groups, + phi::errors::InvalidArgument( + "The number of input's channels should be equal to filter's channels " + "* groups for Op(Conv). But received: the input's channels is %d, " + "the input's shape is [%s]; the filter's channels is %d, the " + "filter's shape is [%s]; the groups is %d, the data_format is %s. " + "The error may come from wrong data_format setting.", + input_channels, + in_dims, + filter_dims[1], + filter_dims, + groups, + data_format)); + PADDLE_ENFORCE_EQ( + filter_dims[0] % groups, + 0, + phi::errors::InvalidArgument( + "The number of output's channels (filter's first dimension) of " + "Op(Conv) should be divided by groups. But received: " + "the output channels is %d, the filter's shape is [%s], " + "the groups is %d.", + filter_dims[0], + filter_dims, + groups)); + + if (config.is_runtime) { + PADDLE_ENFORCE_GT( + filter_dims[0], + 0, + phi::errors::InvalidArgument( + "the size of filter at axis 0 should be greater than 0")); + } + + DDim in_data_dims; + if (channel_last) { + in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); + } else { + in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); + } + + DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); + + std::vector ksize = phi::vectorize(filter_data_dims); + phi::UpdatePaddingAndDilation( + &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); + + std::vector output_shape({in_dims[0]}); + if (!channel_last) { + output_shape.push_back(filter_dims[0]); + } + for (int i = 0; i < in_data_dims.size(); ++i) { + if ((!config.is_runtime) && + (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) { + output_shape.push_back(-1); + } else { + const int dkernel = dilations[i] * (filter_data_dims[i] - 1) + 1; + int output_size = + (in_data_dims[i] + paddings[2 * i] + paddings[2 * i + 1] - dkernel) / + strides[i] + + 1; + output_shape.push_back(output_size); + } + } + if (channel_last) { + output_shape.push_back(filter_dims[0]); + } + + out->set_dims(make_ddim(output_shape)); + out->set_dtype(input.dtype()); +} + void DistInferMeta(const MetaTensor& x, const MetaTensor& y, float p, @@ -815,6 +972,13 @@ void LogLossInferMeta(const MetaTensor& input, out->share_lod(input); } +void MaskedSelectInferMeta(const MetaTensor& x, + const MetaTensor& mask, + MetaTensor* out) { + out->set_dims({-1}); // can not infer + out->set_dtype(x.dtype()); +} + void MatmulInferMeta(const MetaTensor& x, const MetaTensor& y, bool trans_x, @@ -1188,6 +1352,118 @@ void TriangularSolveInferMeta(const MetaTensor& x, out->share_lod(y); } +void YoloBoxInferMeta(const MetaTensor& x, + const MetaTensor& img_size, + const std::vector& anchors, + int class_num, + float conf_thresh, + int downsample_ratio, + bool clip_bbox, + float scale_x_y, + bool iou_aware, + float iou_aware_factor, + MetaTensor* boxes, + MetaTensor* scores, + MetaConfig config) { + auto dim_x = x.dims(); + auto dim_imgsize = img_size.dims(); + int anchor_num = anchors.size() / 2; + + PADDLE_ENFORCE_EQ( + dim_x.size(), + 4, + phi::errors::InvalidArgument("Input(X) should be a 4-D tensor." + "But received X dimension(%s)", + dim_x.size())); + if (iou_aware) { + PADDLE_ENFORCE_EQ( + dim_x[1], + anchor_num * (6 + class_num), + phi::errors::InvalidArgument( + "Input(X) dim[1] should be equal to (anchor_mask_number * (6 " + "+ class_num)) while iou_aware is true." + "But received dim[1](%s) != (anchor_mask_number * " + "(6+class_num)(%s).", + dim_x[1], + anchor_num * (6 + class_num))); + PADDLE_ENFORCE_GE( + iou_aware_factor, + 0, + phi::errors::InvalidArgument( + "Attr(iou_aware_factor) should greater than or equal to 0." + "But received iou_aware_factor (%s)", + iou_aware_factor)); + PADDLE_ENFORCE_LE( + iou_aware_factor, + 1, + phi::errors::InvalidArgument( + "Attr(iou_aware_factor) should less than or equal to 1." + "But received iou_aware_factor (%s)", + iou_aware_factor)); + } else { + PADDLE_ENFORCE_EQ( + dim_x[1], + anchor_num * (5 + class_num), + phi::errors::InvalidArgument( + "Input(X) dim[1] should be equal to (anchor_mask_number * (5 " + "+ class_num))." + "But received dim[1](%s) != (anchor_mask_number * " + "(5+class_num)(%s).", + dim_x[1], + anchor_num * (5 + class_num))); + } + PADDLE_ENFORCE_EQ( + dim_imgsize.size(), + 2, + phi::errors::InvalidArgument("Input(ImgSize) should be a 2-D tensor." + "But received Imgsize size(%s)", + dim_imgsize.size())); + if ((dim_imgsize[0] > 0 && dim_x[0] > 0) || config.is_runtime) { + PADDLE_ENFORCE_EQ( + dim_imgsize[0], + dim_x[0], + phi::errors::InvalidArgument( + "Input(ImgSize) dim[0] and Input(X) dim[0] should be same.")); + } + PADDLE_ENFORCE_EQ( + dim_imgsize[1], + 2, + phi::errors::InvalidArgument("Input(ImgSize) dim[1] should be 2." + "But received imgsize dim[1](%s).", + dim_imgsize[1])); + PADDLE_ENFORCE_GT(anchors.size(), + 0, + phi::errors::InvalidArgument( + "Attr(anchors) length should be greater than 0." + "But received anchors length(%s).", + anchors.size())); + PADDLE_ENFORCE_EQ(anchors.size() % 2, + 0, + phi::errors::InvalidArgument( + "Attr(anchors) length should be even integer." + "But received anchors length (%s)", + anchors.size())); + PADDLE_ENFORCE_GT(class_num, + 0, + phi::errors::InvalidArgument( + "Attr(class_num) should be an integer greater than 0." + "But received class_num (%s)", + class_num)); + + int box_num; + if ((dim_x[2] > 0 && dim_x[3] > 0) || config.is_runtime) { + box_num = dim_x[2] * dim_x[3] * anchor_num; + } else { + box_num = -1; + } + std::vector dim_boxes({dim_x[0], box_num, 4}); + boxes->set_dims(phi::make_ddim(dim_boxes)); + boxes->set_dtype(x.dtype()); + + std::vector dim_scores({dim_x[0], box_num, class_num}); + scores->set_dims(phi::make_ddim(dim_scores)); +} + void ValueCompareInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out, @@ -1201,3 +1477,4 @@ void ValueCompareInferMeta(const MetaTensor& x, } // namespace phi PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta); +PD_REGISTER_INFER_META_FN(conv2d, phi::ConvInferMeta); diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h index cb7a83f39a..d770a096de 100644 --- a/paddle/phi/infermeta/binary.h +++ b/paddle/phi/infermeta/binary.h @@ -69,6 +69,20 @@ void CompareInferMeta(const MetaTensor& x, int axis, MetaTensor* out); +void ConvInferMeta(const MetaTensor& input, + const MetaTensor& filter, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + MetaTensor* out, + MetaConfig config = MetaConfig()); + void CrossInferMeta(const MetaTensor& x, const MetaTensor& y, int axis, @@ -138,6 +152,10 @@ void LogLossInferMeta(const MetaTensor& input, MetaTensor* out, MetaConfig config = MetaConfig()); +void MaskedSelectInferMeta(const MetaTensor& x, + const MetaTensor& mask, + MetaTensor* out); + void MatmulInferMeta(const MetaTensor& x, const MetaTensor& y, bool trans_x, @@ -180,6 +198,20 @@ void TriangularSolveInferMeta(const MetaTensor& x, bool unitriangular, MetaTensor* out); +void YoloBoxInferMeta(const MetaTensor& x, + const MetaTensor& img_size, + const std::vector& anchors, + int class_num, + float conf_thresh, + int downsample_ratio, + bool clip_bbox, + float scale_x_y, + bool iou_aware, + float iou_aware_factor, + MetaTensor* boxes, + MetaTensor* scores, + MetaConfig config = MetaConfig()); + void ValueCompareInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out, diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 3f77a20af2..3e9da9a217 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -14,7 +14,9 @@ limitations under the License. */ #include "paddle/phi/infermeta/multiary.h" #include +#include "paddle/phi/common/layout.h" #include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" namespace phi { @@ -200,6 +202,114 @@ void AucInferMeta(const MetaTensor& input, } } +void BatchNormInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + const MetaTensor& mean, + const MetaTensor& variance, + float momentum, + float epsilon, + const std::string& data_layout_str, + bool is_test, + bool use_global_stats, + bool trainable_statistics, + bool fuse_with_relu, + MetaTensor* y, + MetaTensor* mean_out, + MetaTensor* variance_out, + MetaTensor* saved_mean, + MetaTensor* saved_variance, + MetaTensor* reserve_space, + MetaConfig config) { + const auto x_dims = x.dims(); + for (int i = 0; i < x_dims.size(); i++) { + PADDLE_ENFORCE_EQ( + (x_dims[i] == -1) || (x_dims[i] > 0), + true, + phi::errors::InvalidArgument( + "Each dimension of input tensor is expected to be -1 or a " + "positive number, but recieved %d. Input's shape is [%s].", + x_dims[i], + x_dims)); + } + + const DataLayout data_layout = + paddle::framework::StringToDataLayout(data_layout_str); + + PADDLE_ENFORCE_GE( + x_dims.size(), + 2, + phi::errors::InvalidArgument( + "ShapeError: the dimension of input " + "X must greater than or equal to 2. But received: the shape of input " + "X = [%s], the dimension of input X =[%d]", + x_dims, + x_dims.size())); + PADDLE_ENFORCE_LE( + x_dims.size(), + 5, + phi::errors::InvalidArgument( + "ShapeError: the dimension of input X " + "must smaller than or equal to 5. But received: the shape of input X " + "= [%s], the dimension of input X = [%d]", + x_dims, + x_dims.size())); + + const int64_t C = ((config.is_run_mkldnn_kernel == true) || + (data_layout == DataLayout::kNCHW) + ? x_dims[1] + : x_dims[x_dims.size() - 1]); + auto scale_dim = scale.dims(); + auto bias_dim = bias.dims(); + + PADDLE_ENFORCE_EQ( + scale_dim.size(), + 1UL, + phi::errors::InvalidArgument( + "ShapeError: the dimension of scale must equal to 1." + "But received: the shape of scale is [%s], the dimension " + "of scale is [%d]", + scale_dim, + scale_dim.size())); + PADDLE_ENFORCE_EQ(bias_dim.size(), + 1UL, + phi::errors::InvalidArgument( + "ShapeError: the dimension of bias must equal to 1." + "But received: the shape of bias is [%s],the dimension " + "of bias is [%d]", + bias_dim, + bias_dim.size())); + + bool check = true; + if ((!config.is_runtime) && + (phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(scale_dim[0], + C, + phi::errors::InvalidArgument( + "ShapeError: the shape of scale must equal to [%d]" + "But received: the shape of scale is [%d]", + C, + scale_dim[0])); + PADDLE_ENFORCE_EQ(bias_dim[0], + C, + phi::errors::InvalidArgument( + "ShapeError: the shape of bias must equal to [%d]" + "But received: the shape of bias is [%d]", + C, + bias_dim[0])); + } + y->set_dims(x_dims); + mean_out->set_dims({C}); + variance_out->set_dims({C}); + saved_mean->set_dims({C}); + saved_variance->set_dims({C}); + y->share_lod(x); +} + void BilinearTensorProductInferMeta(const MetaTensor& x, const MetaTensor& y, const MetaTensor& weight, @@ -577,3 +687,5 @@ void WhereInferMeta(const MetaTensor& condition, } } // namespace phi + +PD_REGISTER_INFER_META_FN(batch_norm, phi::BatchNormInferMeta); diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index a712ca31de..068766c0e1 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -72,6 +72,26 @@ void AucInferMeta(const MetaTensor& input, MetaTensor* stat_neg_out, MetaConfig config = MetaConfig()); +void BatchNormInferMeta(const MetaTensor& x, + const MetaTensor& scale, + const MetaTensor& bias, + const MetaTensor& mean, + const MetaTensor& variance, + float momentum, + float epsilon, + const std::string& data_layout, + bool is_test, + bool use_global_stats, + bool trainable_statistics, + bool fuse_with_relu, + MetaTensor* y, + MetaTensor* mean_out, + MetaTensor* variance_out, + MetaTensor* saved_mean, + MetaTensor* saved_variance, + MetaTensor* reserve_space, + MetaConfig config = MetaConfig()); + void BilinearTensorProductInferMeta(const MetaTensor& x, const MetaTensor& y, const MetaTensor& weight, diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 03029550c2..0f51839553 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -304,6 +304,17 @@ void DiagonalInferMeta(const MetaTensor& input, out->set_dims(phi::make_ddim(out_dims)); } +void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask) { + auto x_dims = x.dims(); + out->set_dims(x_dims); + out->share_lod(x); + out->set_dtype(x.dtype()); + + if (mask != nullptr) { + mask->set_dims(x_dims); + } +} + void EighInferMeta(const MetaTensor& x, const std::string& uplo, MetaTensor* out_w, @@ -392,6 +403,26 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x, UnchangedInferMetaCheckAxis(x, axis, out); } +void HistogramInferMeta( + const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out) { + PADDLE_ENFORCE_GE(bins, + 1, + phi::errors::InvalidArgument( + "The bins should be greater than or equal to 1." + "But received nbins is %d", + bins)); + PADDLE_ENFORCE_GE( + max, + min, + phi::errors::InvalidArgument("max must be larger or equal to min." + "But received max is %d, min is %d", + max, + min)); + + out->set_dims({bins}); + out->share_lod(input); +} + void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out) { PADDLE_ENFORCE_EQ( product(x.dims()), @@ -787,6 +818,24 @@ void MultinomialInferMeta(const MetaTensor& x, out->set_dtype(DataType::INT64); } +void NormInferMeta(const MetaTensor& x, + int axis, + float epsilon, + bool is_test, + MetaTensor* out, + MetaTensor* norm) { + auto xdim = x.dims(); + out->set_dims(x.dims()); + out->set_dtype(x.dtype()); + + if (is_test == false) { + if (axis < 0) axis = xdim.size() + axis; + xdim[axis] = 1; + norm->set_dims(xdim); + norm->set_dtype(x.dtype()); + } +} + void PadInferMeta(const MetaTensor& input, const std::vector& paddings, float pad_value, diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 00026f8598..2d51bac995 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -74,6 +74,8 @@ void DiagInferMeta(const MetaTensor& x, void DiagonalInferMeta( const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out); +void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask); + void EighInferMeta(const MetaTensor& x, const std::string& uplo, MetaTensor* out_w, @@ -89,6 +91,8 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x, bool hard, int axis, MetaTensor* out); +void HistogramInferMeta( + const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out); void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out); @@ -130,6 +134,12 @@ void MultinomialInferMeta(const MetaTensor& x, int num_samples, bool replacement, MetaTensor* out); +void NormInferMeta(const MetaTensor& x, + int axis, + float epsilon, + bool is_test, + MetaTensor* out, + MetaTensor* norm); void PadInferMeta(const MetaTensor& input, const std::vector& paddings, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py index 33df428388..81bb182802 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_gelu_fuse_pass.py @@ -19,6 +19,7 @@ import paddle.inference as paddle_infer from functools import partial from typing import Optional, List, Callable, Dict, Any, Set import unittest +import paddle import hypothesis from hypothesis import given, settings, seed, example, assume @@ -104,4 +105,5 @@ class TestConvGeluMkldnnFusePass(PassAutoScanTest): if __name__ == "__main__": + paddle.enable_static() unittest.main() -- GitLab