未验证 提交 8e4e19ab 编写于 作者: H hong 提交者: GitHub

Add infer meta (#40544)

* add infer meta; test=develop

* add histogram infer meta; test=develop

* fix unitest bug; test=develop

* format; test=develop

* format; test=develop

* bn not use new infer meta; test=develop

* add infer meta; test=develop

* fixbug; test=develop

* fix bug;

* recover unitest; test=develop
上级 8e612903
...@@ -21,6 +21,9 @@ limitations under the License. */ ...@@ -21,6 +21,9 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/infermeta/multiary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -297,184 +300,6 @@ The required data format for this layer is one of the following: ...@@ -297,184 +300,6 @@ The required data format for this layer is one of the following:
)DOC"); )DOC");
} }
template <typename T>
class BatchNormKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const float epsilon = ctx.Attr<float>("epsilon");
float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool trainable_stats = ctx.Attr<bool>("trainable_statistics");
bool test_mode = is_test && (!trainable_stats);
bool global_stats = test_mode || use_global_stats;
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
DataLayout data_layout = framework::StringToDataLayout(data_layout_str);
const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(), 2,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 1."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 5,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensionss is [%d]",
x_dims.size()));
const int N = x_dims[0];
const int C =
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
auto *y = ctx.Output<Tensor>("Y");
auto *mean_out = ctx.Output<Tensor>("MeanOut");
auto *variance_out = ctx.Output<Tensor>("VarianceOut");
auto *saved_mean = ctx.Output<Tensor>("SavedMean");
auto *saved_variance = ctx.Output<Tensor>("SavedVariance");
// alloc memory
y->mutable_data<T>(ctx.GetPlace());
mean_out->mutable_data<T>(ctx.GetPlace());
variance_out->mutable_data<T>(ctx.GetPlace());
saved_mean->mutable_data<T>(ctx.GetPlace());
saved_variance->mutable_data<T>(ctx.GetPlace());
// input dimension is 2 and the format is NCHW. The input can be regarded
// as NHWC format
if (x_dims.size() == 2 && data_layout == DataLayout::kNCHW) {
data_layout = DataLayout::kNHWC;
}
if (!global_stats) {
// saved_xx is use just in this batch of data
EigenVectorArrayMap<T> saved_mean_e(
saved_mean->mutable_data<T>(ctx.GetPlace()), C);
EigenVectorArrayMap<T> saved_variance_e(
saved_variance->mutable_data<T>(ctx.GetPlace()), C);
saved_mean_e.setZero();
saved_variance_e.setZero();
EigenVectorArrayMap<T> running_mean_arr(
mean_out->mutable_data<T>(ctx.GetPlace()), C);
EigenVectorArrayMap<T> running_var_arr(
variance_out->mutable_data<T>(ctx.GetPlace()), C);
if ((N * sample_size) == 1) {
// Only 1 element in normalization dimension,
// we skip the batch norm calculation, let y = x.
framework::TensorCopy(*x, ctx.GetPlace(), y);
return;
}
switch (data_layout) {
case DataLayout::kNCHW: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
saved_mean_e(nc % C) += x_arr.col(nc).sum();
}
saved_mean_e /= N * sample_size;
for (int nc = 0; nc < N * C; ++nc) {
saved_variance_e(nc % C) +=
(x_arr.col(nc) - saved_mean_e(nc % C)).matrix().squaredNorm();
}
saved_variance_e /= N * sample_size;
break;
}
case DataLayout::kNHWC: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
for (int i = 0; i < N * sample_size; ++i) {
saved_mean_e += x_arr.col(i);
}
saved_mean_e /= N * sample_size;
for (int i = 0; i < N * sample_size; ++i) {
saved_variance_e +=
(x_arr.col(i) - saved_mean_e) * (x_arr.col(i) - saved_mean_e);
}
saved_variance_e /= N * sample_size;
break;
}
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %s", data_layout_str));
}
// if MomentumTensor is set, use MomentumTensor value, momentum
// is only used in this training branch
if (ctx.HasInput("MomentumTensor")) {
const auto *mom_tensor = ctx.Input<Tensor>("MomentumTensor");
momentum = mom_tensor->data<float>()[0];
}
running_mean_arr =
running_mean_arr * momentum + saved_mean_e * (1. - momentum);
running_var_arr =
running_var_arr * momentum + saved_variance_e * (1. - momentum);
}
// use SavedMean and SavedVariance to do normalize
Eigen::Array<T, Eigen::Dynamic, 1> inv_std(C);
if (global_stats) {
ConstEigenVectorArrayMap<T> var_arr(
ctx.Input<Tensor>("Variance")->data<T>(), C);
inv_std = (var_arr + epsilon).sqrt().inverse();
} else {
EigenVectorArrayMap<T> saved_inv_std(
ctx.Output<Tensor>("SavedVariance")->data<T>(), C);
// inverse SavedVariance first, gradient will use it too.
saved_inv_std = (saved_inv_std + epsilon).inverse().sqrt();
inv_std = saved_inv_std;
}
ConstEigenVectorArrayMap<T> mean_arr(
global_stats ? ctx.Input<Tensor>("Mean")->data<T>()
: ctx.Output<Tensor>("SavedMean")->data<T>(),
C);
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
const auto *scale = ctx.Input<Tensor>("Scale");
const auto *bias = ctx.Input<Tensor>("Bias");
ConstEigenVectorArrayMap<T> scale_arr(scale->data<T>(), C);
ConstEigenVectorArrayMap<T> bias_arr(bias->data<T>(), C);
Eigen::Array<T, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
Eigen::Array<T, Eigen::Dynamic, 1> new_bias =
bias_arr - mean_arr * inv_std * scale_arr;
switch (data_layout) {
case DataLayout::kNCHW: {
EigenArrayMap<T> y_arr(y->mutable_data<T>(ctx.GetPlace()), sample_size,
N * C);
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
y_arr.col(nc) = x_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
}
break;
}
case DataLayout::kNHWC: {
EigenArrayMap<T>(y->mutable_data<T>(ctx.GetPlace()), C,
N * sample_size) =
(ConstEigenArrayMap<T>(x->data<T>(), C, N * sample_size).colwise() *
new_scale)
.colwise() +
new_bias;
break;
}
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %d", data_layout));
}
}
};
void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const {
// check input // check input
OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNormGrad"); OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", "BatchNormGrad");
...@@ -585,261 +410,6 @@ framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar( ...@@ -585,261 +410,6 @@ framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar(
tensor.place(), tensor.layout()); tensor.place(), tensor.layout());
} }
template <typename T>
class BatchNormGradKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto *scale = ctx.Input<Tensor>("Scale");
const auto *bias = ctx.Input<Tensor>("Bias");
const auto *saved_mean = ctx.Input<Tensor>("SavedMean");
// SavedVariance have been reverted in forward operator
const auto *saved_inv_variance = ctx.Input<Tensor>("SavedVariance");
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool is_test = ctx.Attr<bool>("is_test");
const float epsilon = ctx.Attr<float>("epsilon");
DataLayout data_layout = framework::StringToDataLayout(data_layout_str);
auto *d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *d_scale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto *d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
use_global_stats = is_test || use_global_stats;
// batch_norm with inplace as false will take X as grad input, which
// is same as cuDNN batch_norm backward calculation, batch_norm
// with inplace as true only take Y as input and X should be calculate
// by inverse operation of batch_norm on Y
const Tensor *x;
bool is_inplace;
if (ctx.HasInput("Y")) {
x = ctx.Input<Tensor>("Y");
is_inplace = true;
// if the input of batch norm is stop_gradient, d_x is null.
if (d_x) {
PADDLE_ENFORCE_EQ(d_x, d_y,
platform::errors::InvalidArgument(
"X@GRAD and Y@GRAD not inplace in inplace mode"));
}
} else {
x = ctx.Input<Tensor>("X");
is_inplace = false;
if (d_x) {
PADDLE_ENFORCE_NE(
d_x, d_y, platform::errors::InvalidArgument(
"X@GRAD and Y@GRAD inplaced in non-inplace mode"));
}
}
// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
const auto &x_dims = x->dims();
PADDLE_ENFORCE_GE(
x_dims.size(), 2,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be larger than 1."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(), 5,
platform::errors::InvalidArgument(
"The size of input X's dimensions should be less than 6."
"But received: the size of input X's dimensions is [%d]",
x_dims.size()));
const int N = x_dims[0];
const int C =
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
// input dimension is 2 and the format is NCHW. The input can be regarded as
// NHWC format
if (x_dims.size() == 2 && data_layout == DataLayout::kNCHW) {
data_layout = DataLayout::kNHWC;
}
// init output
if (d_x) {
d_x->mutable_data<T>(ctx.GetPlace());
}
const T *mean_data = saved_mean->data<T>();
const T *inv_var_data = saved_inv_variance->data<T>();
Tensor inv_var_tensor;
if (use_global_stats) {
const auto *running_mean = ctx.Input<Tensor>("Mean");
const auto *running_variance = ctx.Input<Tensor>("Variance");
mean_data = running_mean->data<T>();
inv_var_tensor.Resize({C});
T *running_inv_var_data = inv_var_tensor.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> inv_var_tmp(running_inv_var_data, C);
ConstEigenVectorArrayMap<T> var_arr(running_variance->data<T>(), C);
inv_var_tmp = (var_arr + epsilon).sqrt().inverse();
inv_var_data = running_inv_var_data;
}
ConstEigenVectorArrayMap<T> scale_arr(scale->data<T>(), C);
ConstEigenVectorArrayMap<T> bias_arr(bias->data<T>(), C);
ConstEigenVectorArrayMap<T> mean_arr(mean_data, C);
ConstEigenVectorArrayMap<T> inv_var_arr(inv_var_data, C);
T *d_bias_data = nullptr;
T *d_scale_data = nullptr;
if (d_scale && d_bias) {
d_scale->mutable_data<T>(ctx.GetPlace());
d_bias->mutable_data<T>(ctx.GetPlace());
d_bias_data = d_bias->mutable_data<T>(ctx.GetPlace());
d_scale_data = d_scale->mutable_data<T>(ctx.GetPlace());
}
// d_bias = np.sum(d_y, axis=0)
// d_scale = np.sum((X - mean) / inv_std * dy, axis=0)
// d_x = (1. / N) * scale * inv_var * (N * d_y - np.sum(d_y, axis=0)
// - (X - mean) * inv_var * inv_var * np.sum(d_y * (X - mean), axis=0))
EigenVectorArrayMap<T> d_bias_arr(d_bias_data, C);
EigenVectorArrayMap<T> d_scale_arr(d_scale_data, C);
if (d_scale && d_bias) {
d_bias_arr.setZero();
d_scale_arr.setZero();
}
if (d_x && (N * sample_size) == 1 && !use_global_stats) {
framework::TensorCopy(*d_y, ctx.GetPlace(), d_x);
return;
}
int scale_coefff = use_global_stats ? 1 : N * sample_size;
const auto scale_inv_var_nhw = scale_arr * inv_var_arr / scale_coefff;
Tensor dy_sum;
dy_sum.Resize({C});
dy_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_sum_arr(dy_sum.mutable_data<T>(ctx.GetPlace()),
C);
Tensor dy_mul_x_sub_mean_mul_invstd_sum;
dy_mul_x_sub_mean_mul_invstd_sum.Resize({C});
dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dy_mul_x_sub_mean_mul_invstd_sum_arr(
dy_mul_x_sub_mean_mul_invstd_sum.mutable_data<T>(ctx.GetPlace()), C);
dy_sum_arr.setZero();
dy_mul_x_sub_mean_mul_invstd_sum_arr.setZero();
// inplace calculation
// Y: ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
// X: (y - bias) / scale / (inv_var) + est_mean
// formula transform ====>
// (y - bias) / (scale * inv_var) + est_mean
switch (data_layout) {
case DataLayout::kNCHW: {
if (is_inplace) {
auto px = *x;
EigenArrayMap<T> x_data(px.mutable_data<T>(ctx.GetPlace()),
sample_size, N * C);
ConstEigenArrayMap<T> y_data(x->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
x_data.col(nc) = (y_data.col(nc) - bias_arr(nc % C)) /
scale_inv_var_nhw(nc % C) / scale_coefff +
mean_arr(nc % C);
}
}
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
int c = nc % C;
dy_sum_arr(c) += d_y_arr.col(nc).sum();
dy_mul_x_sub_mean_mul_invstd_sum_arr(c) +=
((x_arr.col(nc) - mean_arr(c)) * inv_var_arr(c) * d_y_arr.col(nc))
.sum();
}
if (d_scale && d_bias) {
d_bias_arr = dy_sum_arr;
d_scale_arr = dy_mul_x_sub_mean_mul_invstd_sum_arr;
}
if (d_x) {
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()),
sample_size, N * C);
if (!use_global_stats) {
for (int nc = 0; nc < N * C; ++nc) {
int c = nc % C;
d_x_arr.col(nc) =
scale_inv_var_nhw(c) *
(d_y_arr.col(nc) * N * sample_size - dy_sum_arr(c) -
(x_arr.col(nc) - mean_arr[c]) *
dy_mul_x_sub_mean_mul_invstd_sum_arr(c) *
inv_var_arr(c));
}
} else {
for (int nc = 0; nc < N * C; ++nc) {
int c = nc % C;
d_x_arr.col(nc) = scale_inv_var_nhw(c) * d_y_arr.col(nc);
}
}
}
break;
}
case DataLayout::kNHWC: {
if (is_inplace) {
auto px = *x;
EigenArrayMap<T> x_data(px.mutable_data<T>(ctx.GetPlace()), C,
N * sample_size);
ConstEigenArrayMap<T> y_data(x->data<T>(), C, N * sample_size);
for (int nhw = 0; nhw < N * sample_size; nhw++) {
x_data.col(nhw) = (y_data.col(nhw) - bias_arr) / scale_inv_var_nhw /
scale_coefff +
mean_arr;
}
}
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), C, N * sample_size);
for (int nhw = 0; nhw < N * sample_size; ++nhw) {
dy_sum_arr += d_y_arr.col(nhw);
dy_mul_x_sub_mean_mul_invstd_sum_arr +=
(x_arr.col(nhw) - mean_arr) * inv_var_arr * d_y_arr.col(nhw);
}
if (d_scale && d_bias) {
d_bias_arr = dy_sum_arr;
d_scale_arr = dy_mul_x_sub_mean_mul_invstd_sum_arr;
}
if (d_x) {
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()), C,
N * sample_size);
if (!use_global_stats) {
for (int nhw = 0; nhw < N * sample_size; ++nhw) {
d_x_arr.col(nhw) =
scale_inv_var_nhw *
(d_y_arr.col(nhw) * N * sample_size - dy_sum_arr -
(x_arr.col(nhw) - mean_arr) *
dy_mul_x_sub_mean_mul_invstd_sum_arr * inv_var_arr);
}
} else {
for (int nhw = 0; nhw < N * sample_size; ++nhw) {
d_x_arr.col(nhw) = scale_inv_var_nhw * d_y_arr.col(nhw);
}
}
}
break;
}
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %s", data_layout_str));
}
}
};
template <typename T> template <typename T>
void BatchNormGradMaker<T>::Apply(GradOpPtr<T> op) const { void BatchNormGradMaker<T>::Apply(GradOpPtr<T> op) const {
op->SetType(this->ForwardOpType() + "_grad"); op->SetType(this->ForwardOpType() + "_grad");
...@@ -951,335 +521,16 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( ...@@ -951,335 +521,16 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
} }
template <typename T>
class BatchNormDoubleGradKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *X = ctx.Input<Tensor>("X");
const auto *Scale = ctx.Input<Tensor>("Scale");
const auto *dY = ctx.Input<Tensor>("DY");
const auto *Saved_mean = ctx.Input<Tensor>("SavedMean");
const auto *Saved_variance = ctx.Input<Tensor>("SavedVariance");
const float epsilon = ctx.Attr<float>("epsilon");
const bool use_global_stats = ctx.Attr<bool>("use_global_stats");
const bool is_test = ctx.Attr<bool>("is_test");
PADDLE_ENFORCE_EQ(
is_test, false,
platform::errors::InvalidArgument(
"`is_test = True` CANNOT be used in train program. If "
"you want to use global status in pre_train model, "
"please set `use_global_stats = True`"));
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const auto *ddX = ctx.Input<Tensor>("DDX");
const auto *ddScale = ctx.Input<Tensor>("DDScale");
const auto *ddBias = ctx.Input<Tensor>("DDBias");
auto *dX = ctx.Output<Tensor>("DX");
auto *dScale = ctx.Output<Tensor>("DScale");
auto *ddY = ctx.Output<Tensor>("DDY");
dX->mutable_data<T>(ctx.GetPlace());
ddY->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
const auto &x_dims = X->dims();
const int C =
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = X->numel() / C;
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_constant;
const T *mean_data = Saved_mean->data<T>();
const T *inv_var_data = Saved_variance->data<T>();
Tensor inv_var_tensor;
if (use_global_stats) {
const auto *running_mean = ctx.Input<Tensor>("Mean");
const auto *running_variance = ctx.Input<Tensor>("Variance");
mean_data = running_mean->data<T>();
inv_var_tensor.Resize({C});
T *running_inv_var_data = inv_var_tensor.mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> inv_var_tmp(running_inv_var_data, C);
ConstEigenVectorArrayMap<T> var_arr(running_variance->data<T>(), C);
inv_var_tmp = (var_arr + epsilon).sqrt().inverse();
inv_var_data = running_inv_var_data;
}
// transpose NCHW -> NHWC for easy calculate
Tensor transformed_x(X->type());
Tensor transformed_dy(dY->type());
Tensor transformed_ddx(ddX->type());
Tensor transformed_dx(dX->type());
Tensor transformed_ddy(ddY->type());
if (data_layout == DataLayout::kNCHW && x_dims.size() > 2) {
VLOG(3) << "Transform batchnorm output from NCHW to NHWC";
// Input Tensor
ResizeToChannelLast<platform::CPUDeviceContext, T>(ctx, X,
&transformed_x);
TransToChannelLast<platform::CPUDeviceContext, T>(ctx, X, &transformed_x);
ResizeToChannelLast<platform::CPUDeviceContext, T>(ctx, dY,
&transformed_dy);
TransToChannelLast<platform::CPUDeviceContext, T>(ctx, dY,
&transformed_dy);
ResizeToChannelLast<platform::CPUDeviceContext, T>(ctx, ddX,
&transformed_ddx);
TransToChannelLast<platform::CPUDeviceContext, T>(ctx, ddX,
&transformed_ddx);
// Output Tensor
ResizeToChannelLast<platform::CPUDeviceContext, T>(ctx, dX,
&transformed_dx);
ResizeToChannelLast<platform::CPUDeviceContext, T>(ctx, ddY,
&transformed_ddy);
} else {
transformed_x.ShareDataWith(*X);
transformed_dy.ShareDataWith(*dY);
transformed_ddx.ShareDataWith(*ddX);
transformed_dx.ShareDataWith(*dX);
transformed_ddy.ShareDataWith(*ddY);
}
ConstEigenArrayMap<T> x_arr(transformed_x.data<T>(), C, sample_size);
ConstEigenVectorArrayMap<T> mean_arr(mean_data, C);
ConstEigenVectorArrayMap<T> inv_var_arr(inv_var_data, C);
Tensor mean_tile;
mean_tile.Resize({C, sample_size});
mean_tile.mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> mean_tile_data(mean_tile.mutable_data<T>(ctx.GetPlace()),
C, sample_size);
Tensor inv_var_tile;
inv_var_tile.Resize({C, sample_size});
inv_var_tile.mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> inv_var_tile_data(
inv_var_tile.mutable_data<T>(ctx.GetPlace()), C, sample_size);
mean_tile_data = mean_arr.replicate(1, sample_size);
inv_var_tile_data = inv_var_arr.replicate(1, sample_size);
Tensor Scale_data;
if (!Scale) {
Scale_data.mutable_data<T>({C}, ctx.GetPlace());
set_constant(dev_ctx, &Scale_data, static_cast<T>(1));
}
ConstEigenVectorArrayMap<T> scale_arr(
Scale ? Scale->data<T>() : Scale_data.data<T>(), C);
Tensor scale_tile;
scale_tile.Resize({C, sample_size});
scale_tile.mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> scale_tile_data(scale_tile.mutable_data<T>(ctx.GetPlace()),
C, sample_size);
scale_tile_data = scale_arr.replicate(1, sample_size);
ConstEigenArrayMap<T> dy_arr(transformed_dy.data<T>(), C, sample_size);
ConstEigenArrayMap<T> ddx_arr(transformed_ddx.data<T>(), C, sample_size);
Tensor x_sub_mean_mul_invstd;
x_sub_mean_mul_invstd.Resize({C, sample_size});
x_sub_mean_mul_invstd.mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> x_sub_mean_mul_invstd_arr(
x_sub_mean_mul_invstd.mutable_data<T>(ctx.GetPlace()), C, sample_size);
x_sub_mean_mul_invstd_arr = (x_arr - mean_tile_data) * inv_var_tile_data;
if (dX) {
dX->mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> dx_arr(transformed_dx.mutable_data<T>(ctx.GetPlace()), C,
sample_size);
dx_arr.setZero();
if (use_global_stats) {
// math: dx = (ddscale * dy) * inv_var
if (ddScale) {
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
Tensor ddscale_tile;
ddscale_tile.Resize({C, sample_size});
EigenArrayMap<T> ddscale_tile_data(
ddscale_tile.mutable_data<T>(ctx.GetPlace()), C, sample_size);
ddscale_tile_data = ddscale_arr.replicate(1, sample_size);
dx_arr = dy_arr * ddscale_tile_data * inv_var_tile_data;
}
} else {
// math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx,
// axis=(n,h,w)) *
// np.sum(dy, axis=(n,h,w)) -
// np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x -
// mean),
// axis=(n,h,w)) * inv_var.pow(2) *
// np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) /
// NxHxW *
// np.sum(ddx * (x - mean)) *
// (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW *
// np.sum(dy,
// axis=(n,h,w)) * (x - mean) *
// (np.mean(ddx, axis=(n,h,w)) - ddx)) + ddr * (dy * inv_var -
// inv_var
// *
// np.mean(dy, axis=(n,h,w)) -
// inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean),
// axis=(n,h,w)))
if (ddX) {
dx_arr +=
(x_sub_mean_mul_invstd_arr * inv_var_tile_data *
inv_var_tile_data / sample_size)
.colwise() *
(ddx_arr.rowwise().sum() * dy_arr.rowwise().sum() / sample_size -
(dy_arr * ddx_arr).rowwise().sum() +
3. * (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() *
(ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() /
sample_size);
dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() *
(ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() /
sample_size *
(dy_arr.rowwise().sum() / sample_size - dy_arr);
dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() *
(dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() /
sample_size *
(ddx_arr.rowwise().sum() / sample_size - ddx_arr);
dx_arr = scale_tile_data * dx_arr;
}
if (ddScale) {
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
Tensor ddscale_tile;
ddscale_tile.Resize({C, sample_size});
EigenArrayMap<T> ddscale_tile_data(
ddscale_tile.mutable_data<T>(ctx.GetPlace()), C, sample_size);
ddscale_tile_data = ddscale_arr.replicate(1, sample_size);
dx_arr += (dy_arr * inv_var_tile_data -
(dy_arr.rowwise().sum().replicate(1, sample_size) /
sample_size) *
inv_var_tile_data -
x_sub_mean_mul_invstd_arr * inv_var_tile_data *
(dy_arr * x_sub_mean_mul_invstd_arr)
.rowwise()
.sum()
.replicate(1, sample_size) /
sample_size) *
ddscale_tile_data;
}
}
if (data_layout == DataLayout::kNCHW) {
VLOG(3) << "Transform batchnorm output from NHWC to NCHW";
TransToChannelFirst<paddle::platform::CPUDeviceContext, T>(
ctx, &transformed_dx, dX);
}
}
if (dScale) {
dScale->mutable_data<T>(ctx.GetPlace());
EigenVectorArrayMap<T> dscale_arr(dScale->mutable_data<T>(ctx.GetPlace()),
C);
dscale_arr.setZero();
if (use_global_stats) {
// math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var
if (ddX) {
dscale_arr = (ddx_arr * dy_arr * inv_var_tile_data).rowwise().sum();
}
} else {
// math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) *
// inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) *
// ddx
if (ddX) {
Tensor first_grad;
first_grad.Resize({C, sample_size});
EigenArrayMap<T> first_grad_arr(
first_grad.mutable_data<T>(ctx.GetPlace()), C, sample_size);
first_grad_arr.setZero();
first_grad_arr +=
inv_var_tile_data *
(dy_arr -
dy_arr.rowwise().sum().replicate(1, sample_size) / sample_size -
x_sub_mean_mul_invstd_arr *
(dy_arr * x_sub_mean_mul_invstd_arr)
.rowwise()
.sum()
.replicate(1, sample_size) /
sample_size);
dscale_arr = (first_grad_arr * ddx_arr).rowwise().sum();
}
}
}
if (ddY) {
ddY->mutable_data<T>(ctx.GetPlace());
EigenArrayMap<T> ddy_arr(transformed_ddy.mutable_data<T>(ctx.GetPlace()),
C, sample_size);
ddy_arr.setZero();
if (use_global_stats) {
// math: ddy = r * ddx * inv_var + ddbias +
// ddscale * (x - mean) * inv_var
if (ddX) {
ddy_arr = scale_tile_data * ddx_arr * inv_var_tile_data;
}
} else {
// math: ddy = (x - mean) * inv_var * ddscale + ddbias +
// scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) *
// np.mean(ddx * (x - mean), axis=(n,h,w)))
if (ddX) {
ddy_arr +=
scale_tile_data * inv_var_tile_data *
(ddx_arr -
ddx_arr.rowwise().sum().replicate(1, sample_size) / sample_size -
x_sub_mean_mul_invstd_arr *
(ddx_arr * x_sub_mean_mul_invstd_arr)
.rowwise()
.sum()
.replicate(1, sample_size) /
sample_size);
}
}
if (ddScale) {
ConstEigenVectorArrayMap<T> ddscale_arr(ddScale->data<T>(), C);
Tensor ddscale_tile;
ddscale_tile.Resize({C, sample_size});
EigenArrayMap<T> ddscale_tile_data(
ddscale_tile.mutable_data<T>(ctx.GetPlace()), C, sample_size);
ddscale_tile_data = ddscale_arr.replicate(1, sample_size);
ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data;
}
if (ddBias) {
ConstEigenVectorArrayMap<T> ddbias_arr(ddBias->data<T>(), C);
Tensor ddbias_tile;
ddbias_tile.Resize({C, sample_size});
EigenArrayMap<T> ddbias_tile_data(
ddbias_tile.mutable_data<T>(ctx.GetPlace()), C, sample_size);
ddbias_tile_data = ddbias_arr.replicate(1, sample_size);
ddy_arr += ddbias_tile_data;
}
if (data_layout == DataLayout::kNCHW) {
VLOG(3) << "Transform batchnorm output from NHWC to NCHW";
TransToChannelFirst<paddle::platform::CPUDeviceContext, T>(
ctx, &transformed_ddy, ddY);
}
}
}
};
DECLARE_INPLACE_OP_INFERER(BatchNormDoubleGradOpInplaceInferer, {"DY", "DDY"}); DECLARE_INPLACE_OP_INFERER(BatchNormDoubleGradOpInplaceInferer, {"DY", "DDY"});
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(batch_norm, BatchNormInferShapeFunctor,
PD_INFER_META(phi::BatchNormInferMeta));
REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker,
ops::BatchNormOpInferVarType, ops::BatchNormOpInferVarType,
ops::BatchNormGradMaker<paddle::framework::OpDesc>, ops::BatchNormGradMaker<paddle::framework::OpDesc>,
......
...@@ -113,23 +113,5 @@ class BatchNormOpInferVarType ...@@ -113,23 +113,5 @@ class BatchNormOpInferVarType
} }
}; };
template <typename DeviceContext, typename T>
class BatchNormKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
class BatchNormGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
class BatchNormDoubleGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override;
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -27,6 +27,9 @@ limitations under the License. */ ...@@ -27,6 +27,9 @@ limitations under the License. */
#endif #endif
#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/phi/infermeta/binary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -841,6 +844,8 @@ framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType( ...@@ -841,6 +844,8 @@ framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(conv2d, Conv2dInferShapeFunctor,
PD_INFER_META(phi::ConvInferMeta));
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker, REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType, ops::ConvOpInferVarType,
ops::Conv2DGradMaker<paddle::framework::OpDesc>, ops::Conv2DGradMaker<paddle::framework::OpDesc>,
...@@ -851,6 +856,8 @@ REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad, ...@@ -851,6 +856,8 @@ REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad,
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad); REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
// depthwise convolution op // depthwise convolution op
DECLARE_INFER_SHAPE_FUNCTOR(depthwise_conv2d, DepthwiseConv2dInferShapeFunctor,
PD_INFER_META(phi::ConvInferMeta));
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType, ops::ConvOpInferVarType,
ops::Conv2DGradMaker<paddle::framework::OpDesc>, ops::Conv2DGradMaker<paddle::framework::OpDesc>,
...@@ -860,6 +867,8 @@ REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad, ...@@ -860,6 +867,8 @@ REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad,
ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>); ops::Conv2DDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad); REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad);
DECLARE_INFER_SHAPE_FUNCTOR(conv3d, Conv3dInferShapeFunctor,
PD_INFER_META(phi::ConvInferMeta));
REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker, REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker,
ops::ConvOpInferVarType, ops::ConvOpInferVarType,
ops::Conv3DGradMaker<paddle::framework::OpDesc>, ops::Conv3DGradMaker<paddle::framework::OpDesc>,
......
...@@ -9,8 +9,10 @@ ...@@ -9,8 +9,10 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/phi/infermeta/binary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -235,10 +237,13 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -235,10 +237,13 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(yolo_box, YoloBoxInferShapeFunctor,
PD_INFER_META(phi::YoloBoxInferMeta));
REGISTER_OPERATOR( REGISTER_OPERATOR(
yolo_box, ops::YoloBoxOp, ops::YoloBoxOpMaker, yolo_box, ops::YoloBoxOp, ops::YoloBoxOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
YoloBoxInferShapeFunctor);
REGISTER_OP_VERSION(yolo_box) REGISTER_OP_VERSION(yolo_box)
.AddCheckpoint( .AddCheckpoint(
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -25,17 +27,6 @@ class DropoutOp : public framework::OperatorWithKernel { ...@@ -25,17 +27,6 @@ class DropoutOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Dropout");
auto x_dims = ctx->GetInputDim("X");
ctx->SetOutputDim("Out", x_dims);
if (ctx->Attrs().Get<bool>("is_test") == false) {
ctx->SetOutputDim("Mask", x_dims);
}
ctx->ShareLoD("X", /*->*/ "Out");
}
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
...@@ -173,7 +164,11 @@ class DropoutGradOpMaker : public framework::SingleGradOpMaker<T> { ...@@ -173,7 +164,11 @@ class DropoutGradOpMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(dropout, DropoutInferShapeFunctor,
PD_INFER_META(phi::DropoutInferMeta));
REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker, REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker,
ops::DropoutGradOpMaker<paddle::framework::OpDesc>, ops::DropoutGradOpMaker<paddle::framework::OpDesc>,
ops::DropoutGradOpMaker<paddle::imperative::OpBase>); ops::DropoutGradOpMaker<paddle::imperative::OpBase>,
DropoutInferShapeFunctor);
REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad); REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad);
...@@ -120,6 +120,142 @@ class Conv2DFusionOp : public operators::ConvOp { ...@@ -120,6 +120,142 @@ class Conv2DFusionOp : public operators::ConvOp {
ctx->SetOutputsDim("Outputs", output_shapes); ctx->SetOutputsDim("Outputs", output_shapes);
} }
} }
std::vector<int64_t> ComputeOutputShape(
framework::InferShapeContext* ctx) const {
OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv");
OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
int groups = ctx->Attrs().Get<int>("groups");
std::vector<int> dilations =
ctx->Attrs().Get<std::vector<int>>("dilations");
int dilation_size = dilations.size();
for (int i = 0; i < dilation_size; ++i) {
PADDLE_ENFORCE_GT(
dilations[i], 0,
platform::errors::InvalidArgument(
"The dilation of Op(Conv) should be larget than 0, but received "
"dilation is %d.",
dilations[i]));
}
const std::string data_format =
ctx->Attrs().Get<std::string>("data_format");
// MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel
const bool channel_last = (ctx->IsRunMKLDNNKernel() == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
PADDLE_ENFORCE_EQ(
in_dims.size() == 4 || in_dims.size() == 5, true,
platform::errors::InvalidArgument(
"The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
"received: input's dimension is %u, input's shape is [%s].",
in_dims.size(), in_dims));
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
platform::errors::InvalidArgument(
"The input's dimension and filter's dimension of "
"Op(Conv) should be equal. But received: the input's shape is "
"[%s], "
"the input's dimension is %d; the filter's shape is [%s], "
"the filter's dimension is %d.",
in_dims, in_dims.size(), filter_dims, filter_dims.size()));
int stride_size = strides.size();
for (int i = 0; i < stride_size; ++i) {
PADDLE_ENFORCE_GT(
strides[i], 0,
platform::errors::InvalidArgument(
"The stride of Op(Conv) should be larget than 0, but received "
"stride is %d.",
strides[i]));
}
int in_sub_stride_size = in_dims.size() - stride_size;
PADDLE_ENFORCE_EQ(
in_dims.size(), strides.size() + 2U,
platform::errors::InvalidArgument(
"The difference of input's dimension and Attr(strides)'s "
"length must be euqal to 2 for Op(Conv). "
"But received: input's dimension is %d, input's shape is [%s]; "
"Attr(stride)'s length is %d, Attr(stride) is [%s]; "
"difference of input's dimention and Attr(strides)'s length = %u.",
in_dims.size(), in_dims, strides.size(), phi::make_ddim(strides),
in_sub_stride_size));
const auto input_channels =
channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
PADDLE_ENFORCE_EQ(
input_channels, filter_dims[1] * groups,
platform::errors::InvalidArgument(
"The number of input's channels should be equal to filter's "
"channels "
"* groups for Op(Conv). But received: the input's channels is %d, "
"the input's shape is [%s]; the filter's channels is %d, the "
"filter's shape is [%s]; the groups is %d, the data_format is %s. "
"The error may come from wrong data_format setting.",
input_channels, in_dims, filter_dims[1], filter_dims, groups,
data_format));
PADDLE_ENFORCE_EQ(
filter_dims[0] % groups, 0,
platform::errors::InvalidArgument(
"The number of output's channels (filter's first dimension) of "
"Op(Conv) should be divided by groups. But received: "
"the output channels is %d, the filter's shape is [%s], "
"the groups is %d.",
filter_dims[0], filter_dims, groups));
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GT(
filter_dims[0], 0,
platform::errors::InvalidArgument(
"the size of filter at axis 0 should be greater than 0"));
}
framework::DDim in_data_dims;
if (channel_last) {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
}
framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0]});
if (!channel_last) {
output_shape.push_back(filter_dims[0]);
}
for (int i = 0; i < in_data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) &&
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
output_shape.push_back(-1);
} else {
output_shape.push_back(
ConvOutputSize(in_data_dims[i], filter_data_dims[i], dilations[i],
paddings[2 * i], paddings[2 * i + 1], strides[i]));
}
}
if (channel_last) {
output_shape.push_back(filter_dims[0]);
}
return output_shape;
}
}; };
// TODO(qingqing): add gradient operator for conv2d_fusion // TODO(qingqing): add gradient operator for conv2d_fusion
......
...@@ -16,7 +16,9 @@ limitations under the License. */ ...@@ -16,7 +16,9 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -28,27 +30,6 @@ class HistogramOp : public framework::OperatorWithKernel { ...@@ -28,27 +30,6 @@ class HistogramOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "histogram");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "histogram");
const auto &nbins = ctx->Attrs().Get<int64_t>("bins");
const auto &minval = ctx->Attrs().Get<int>("min");
const auto &maxval = ctx->Attrs().Get<int>("max");
PADDLE_ENFORCE_GE(nbins, 1,
platform::errors::InvalidArgument(
"The bins should be greater than or equal to 1."
"But received nbins is %d",
nbins));
PADDLE_ENFORCE_GE(maxval, minval, platform::errors::InvalidArgument(
"max must be larger or equal to min."
"But received max is %d, min is %d",
maxval, minval));
ctx->SetOutputDim("Out", phi::make_ddim({nbins}));
ctx->ShareLoD("X", /*->*/ "Out");
}
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const { const framework::ExecutionContext &ctx) const {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
...@@ -81,7 +62,12 @@ class HistogramOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -81,7 +62,12 @@ class HistogramOpMaker : public framework::OpProtoAndCheckerMaker {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(histogram, HistogramInferShapeFunctor,
PD_INFER_META(phi::HistogramInferMeta));
REGISTER_OPERATOR( REGISTER_OPERATOR(
histogram, ops::HistogramOp, ops::HistogramOpMaker, histogram, ops::HistogramOp, ops::HistogramOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
HistogramInferShapeFunctor);
...@@ -323,6 +323,7 @@ class InplaceABNGradKernel : public framework::OpKernel<T> { ...@@ -323,6 +323,7 @@ class InplaceABNGradKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(inplace_abn, ops::InplaceABNOp, ops::InplaceABNOpMaker, REGISTER_OPERATOR(inplace_abn, ops::InplaceABNOp, ops::InplaceABNOpMaker,
ops::BatchNormOpInferVarType, ops::BatchNormOpInferVarType,
ops::InplaceABNOpGradMaker<paddle::framework::OpDesc>, ops::InplaceABNOpGradMaker<paddle::framework::OpDesc>,
......
...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/infermeta/binary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -21,16 +23,6 @@ class MaskedSelectOp : public framework::OperatorWithKernel { ...@@ -21,16 +23,6 @@ class MaskedSelectOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "Input", "MaskedSelect");
OP_INOUT_CHECK(ctx->HasInput("Mask"), "Input", "Mask", "MaskedSelect");
OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Out", "MaskedSelect");
// output will only be a 1-D Tensor
ctx->SetOutputDim("Y", phi::make_ddim({-1}));
ctx->ShareLoD("X", /*->*/ "Y");
}
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
...@@ -100,8 +92,13 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(MaskedSelectedGradNoNeedBufferVarsInferer, ...@@ -100,8 +92,13 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(MaskedSelectedGradNoNeedBufferVarsInferer,
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(masked_select, MaksedSelectInferShapeFunctor,
PD_INFER_META(phi::MaskedSelectInferMeta));
REGISTER_OPERATOR(masked_select, ops::MaskedSelectOp, ops::MaskedSelectOpMaker, REGISTER_OPERATOR(masked_select, ops::MaskedSelectOp, ops::MaskedSelectOpMaker,
ops::MaskedSelectGradOpMaker<paddle::framework::OpDesc>, ops::MaskedSelectGradOpMaker<paddle::framework::OpDesc>,
ops::MaskedSelectGradOpMaker<paddle::imperative::OpBase>); ops::MaskedSelectGradOpMaker<paddle::imperative::OpBase>,
MaksedSelectInferShapeFunctor);
REGISTER_OPERATOR(masked_select_grad, ops::MaskedSelectOpGrad, REGISTER_OPERATOR(masked_select_grad, ops::MaskedSelectOpGrad,
ops::MaskedSelectedGradNoNeedBufferVarsInferer); ops::MaskedSelectedGradNoNeedBufferVarsInferer);
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/infermeta/unary.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -57,21 +59,7 @@ where, $\sum {x^2}$ is calculated along the `axis` dimension. ...@@ -57,21 +59,7 @@ where, $\sum {x^2}$ is calculated along the `axis` dimension.
}; };
class NormOp : public framework::OperatorWithKernel { class NormOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "NormOp");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "NormOp");
auto xdim = ctx->GetInputDim("X");
ctx->SetOutputDim("Out", xdim);
if (ctx->Attrs().Get<bool>("is_test") == false) {
int axis = ctx->Attrs().Get<int>("axis");
if (axis < 0) axis = xdim.size() + axis;
xdim[axis] = 1;
ctx->SetOutputDim("Norm", xdim);
}
}
}; };
class NormOpGrad : public framework::OperatorWithKernel { class NormOpGrad : public framework::OperatorWithKernel {
...@@ -111,7 +99,11 @@ class NormOpGradOpMaker : public framework::SingleGradOpMaker<T> { ...@@ -111,7 +99,11 @@ class NormOpGradOpMaker : public framework::SingleGradOpMaker<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = paddle::platform::CPUDeviceContext;
DECLARE_INFER_SHAPE_FUNCTOR(norm, NormInferShapeFunctor,
PD_INFER_META(phi::NormInferMeta));
REGISTER_OPERATOR(norm, ops::NormOp, ops::NormOpMaker, REGISTER_OPERATOR(norm, ops::NormOp, ops::NormOpMaker,
ops::NormOpGradOpMaker<paddle::framework::OpDesc>, ops::NormOpGradOpMaker<paddle::framework::OpDesc>,
ops::NormOpGradOpMaker<paddle::imperative::OpBase>); ops::NormOpGradOpMaker<paddle::imperative::OpBase>,
NormInferShapeFunctor);
REGISTER_OPERATOR(norm_grad, ops::NormOpGrad); REGISTER_OPERATOR(norm_grad, ops::NormOpGrad);
...@@ -50,6 +50,7 @@ class SyncBatchNormGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -50,6 +50,7 @@ class SyncBatchNormGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(sync_batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, REGISTER_OPERATOR(sync_batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker,
ops::BatchNormOpInferVarType, ops::BatchNormOpInferVarType,
ops::SyncBatchNormGradMaker<paddle::framework::OpDesc>, ops::SyncBatchNormGradMaker<paddle::framework::OpDesc>,
......
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
namespace phi { namespace phi {
namespace detail { namespace detail {
...@@ -355,6 +357,161 @@ void CrossInferMeta(const MetaTensor& x, ...@@ -355,6 +357,161 @@ void CrossInferMeta(const MetaTensor& x,
out->share_lod(x); out->share_lod(x);
} }
void ConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings_t,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations_t,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config) {
std::vector<int> paddings = paddings_t;
std::vector<int> dilations = dilations_t;
auto in_dims = input.dims();
auto filter_dims = filter.dims();
int dilation_size = dilations.size();
for (int i = 0; i < dilation_size; ++i) {
PADDLE_ENFORCE_GT(
dilations[i],
0,
phi::errors::InvalidArgument(
"The dilation of Op(Conv) should be larget than 0, but received "
"dilation is %d.",
dilations[i]));
}
const bool channel_last = (config.is_run_mkldnn_kernel == false) &&
(data_format == "NHWC" || data_format == "NDHWC");
PADDLE_ENFORCE_EQ(
in_dims.size() == 4 || in_dims.size() == 5,
true,
phi::errors::InvalidArgument(
"The input of Op(Conv) should be a 4-D or 5-D Tensor. But "
"received: input's dimension is %u, input's shape is [%s].",
in_dims.size(),
in_dims));
PADDLE_ENFORCE_EQ(
in_dims.size(),
filter_dims.size(),
phi::errors::InvalidArgument(
"The input's dimension and filter's dimension of "
"Op(Conv) should be equal. But received: the input's shape is [%s], "
"the input's dimension is %d; the filter's shape is [%s], "
"the filter's dimension is %d.",
in_dims,
in_dims.size(),
filter_dims,
filter_dims.size()));
int stride_size = strides.size();
for (int i = 0; i < stride_size; ++i) {
PADDLE_ENFORCE_GT(
strides[i],
0,
phi::errors::InvalidArgument(
"The stride of Op(Conv) should be larget than 0, but received "
"stride is %d.",
strides[i]));
}
int in_sub_stride_size = in_dims.size() - stride_size;
PADDLE_ENFORCE_EQ(
in_dims.size(),
strides.size() + 2U,
phi::errors::InvalidArgument(
"The difference of input's dimension and Attr(strides)'s "
"length must be euqal to 2 for Op(Conv). "
"But received: input's dimension is %d, input's shape is [%s]; "
"Attr(stride)'s length is %d, Attr(stride) is [%s]; "
"difference of input's dimention and Attr(strides)'s length = %u.",
in_dims.size(),
in_dims,
strides.size(),
phi::make_ddim(strides),
in_sub_stride_size));
const auto input_channels =
channel_last ? in_dims[in_dims.size() - 1] : in_dims[1];
PADDLE_ENFORCE_EQ(
input_channels,
filter_dims[1] * groups,
phi::errors::InvalidArgument(
"The number of input's channels should be equal to filter's channels "
"* groups for Op(Conv). But received: the input's channels is %d, "
"the input's shape is [%s]; the filter's channels is %d, the "
"filter's shape is [%s]; the groups is %d, the data_format is %s. "
"The error may come from wrong data_format setting.",
input_channels,
in_dims,
filter_dims[1],
filter_dims,
groups,
data_format));
PADDLE_ENFORCE_EQ(
filter_dims[0] % groups,
0,
phi::errors::InvalidArgument(
"The number of output's channels (filter's first dimension) of "
"Op(Conv) should be divided by groups. But received: "
"the output channels is %d, the filter's shape is [%s], "
"the groups is %d.",
filter_dims[0],
filter_dims,
groups));
if (config.is_runtime) {
PADDLE_ENFORCE_GT(
filter_dims[0],
0,
phi::errors::InvalidArgument(
"the size of filter at axis 0 should be greater than 0"));
}
DDim in_data_dims;
if (channel_last) {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
} else {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
}
DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
phi::UpdatePaddingAndDilation(
&paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0]});
if (!channel_last) {
output_shape.push_back(filter_dims[0]);
}
for (int i = 0; i < in_data_dims.size(); ++i) {
if ((!config.is_runtime) &&
(in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
output_shape.push_back(-1);
} else {
const int dkernel = dilations[i] * (filter_data_dims[i] - 1) + 1;
int output_size =
(in_data_dims[i] + paddings[2 * i] + paddings[2 * i + 1] - dkernel) /
strides[i] +
1;
output_shape.push_back(output_size);
}
}
if (channel_last) {
output_shape.push_back(filter_dims[0]);
}
out->set_dims(make_ddim(output_shape));
out->set_dtype(input.dtype());
}
void DistInferMeta(const MetaTensor& x, void DistInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
float p, float p,
...@@ -815,6 +972,13 @@ void LogLossInferMeta(const MetaTensor& input, ...@@ -815,6 +972,13 @@ void LogLossInferMeta(const MetaTensor& input,
out->share_lod(input); out->share_lod(input);
} }
void MaskedSelectInferMeta(const MetaTensor& x,
const MetaTensor& mask,
MetaTensor* out) {
out->set_dims({-1}); // can not infer
out->set_dtype(x.dtype());
}
void MatmulInferMeta(const MetaTensor& x, void MatmulInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
bool trans_x, bool trans_x,
...@@ -1188,6 +1352,118 @@ void TriangularSolveInferMeta(const MetaTensor& x, ...@@ -1188,6 +1352,118 @@ void TriangularSolveInferMeta(const MetaTensor& x,
out->share_lod(y); out->share_lod(y);
} }
void YoloBoxInferMeta(const MetaTensor& x,
const MetaTensor& img_size,
const std::vector<int>& anchors,
int class_num,
float conf_thresh,
int downsample_ratio,
bool clip_bbox,
float scale_x_y,
bool iou_aware,
float iou_aware_factor,
MetaTensor* boxes,
MetaTensor* scores,
MetaConfig config) {
auto dim_x = x.dims();
auto dim_imgsize = img_size.dims();
int anchor_num = anchors.size() / 2;
PADDLE_ENFORCE_EQ(
dim_x.size(),
4,
phi::errors::InvalidArgument("Input(X) should be a 4-D tensor."
"But received X dimension(%s)",
dim_x.size()));
if (iou_aware) {
PADDLE_ENFORCE_EQ(
dim_x[1],
anchor_num * (6 + class_num),
phi::errors::InvalidArgument(
"Input(X) dim[1] should be equal to (anchor_mask_number * (6 "
"+ class_num)) while iou_aware is true."
"But received dim[1](%s) != (anchor_mask_number * "
"(6+class_num)(%s).",
dim_x[1],
anchor_num * (6 + class_num)));
PADDLE_ENFORCE_GE(
iou_aware_factor,
0,
phi::errors::InvalidArgument(
"Attr(iou_aware_factor) should greater than or equal to 0."
"But received iou_aware_factor (%s)",
iou_aware_factor));
PADDLE_ENFORCE_LE(
iou_aware_factor,
1,
phi::errors::InvalidArgument(
"Attr(iou_aware_factor) should less than or equal to 1."
"But received iou_aware_factor (%s)",
iou_aware_factor));
} else {
PADDLE_ENFORCE_EQ(
dim_x[1],
anchor_num * (5 + class_num),
phi::errors::InvalidArgument(
"Input(X) dim[1] should be equal to (anchor_mask_number * (5 "
"+ class_num))."
"But received dim[1](%s) != (anchor_mask_number * "
"(5+class_num)(%s).",
dim_x[1],
anchor_num * (5 + class_num)));
}
PADDLE_ENFORCE_EQ(
dim_imgsize.size(),
2,
phi::errors::InvalidArgument("Input(ImgSize) should be a 2-D tensor."
"But received Imgsize size(%s)",
dim_imgsize.size()));
if ((dim_imgsize[0] > 0 && dim_x[0] > 0) || config.is_runtime) {
PADDLE_ENFORCE_EQ(
dim_imgsize[0],
dim_x[0],
phi::errors::InvalidArgument(
"Input(ImgSize) dim[0] and Input(X) dim[0] should be same."));
}
PADDLE_ENFORCE_EQ(
dim_imgsize[1],
2,
phi::errors::InvalidArgument("Input(ImgSize) dim[1] should be 2."
"But received imgsize dim[1](%s).",
dim_imgsize[1]));
PADDLE_ENFORCE_GT(anchors.size(),
0,
phi::errors::InvalidArgument(
"Attr(anchors) length should be greater than 0."
"But received anchors length(%s).",
anchors.size()));
PADDLE_ENFORCE_EQ(anchors.size() % 2,
0,
phi::errors::InvalidArgument(
"Attr(anchors) length should be even integer."
"But received anchors length (%s)",
anchors.size()));
PADDLE_ENFORCE_GT(class_num,
0,
phi::errors::InvalidArgument(
"Attr(class_num) should be an integer greater than 0."
"But received class_num (%s)",
class_num));
int box_num;
if ((dim_x[2] > 0 && dim_x[3] > 0) || config.is_runtime) {
box_num = dim_x[2] * dim_x[3] * anchor_num;
} else {
box_num = -1;
}
std::vector<int64_t> dim_boxes({dim_x[0], box_num, 4});
boxes->set_dims(phi::make_ddim(dim_boxes));
boxes->set_dtype(x.dtype());
std::vector<int64_t> dim_scores({dim_x[0], box_num, class_num});
scores->set_dims(phi::make_ddim(dim_scores));
}
void ValueCompareInferMeta(const MetaTensor& x, void ValueCompareInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
MetaTensor* out, MetaTensor* out,
...@@ -1201,3 +1477,4 @@ void ValueCompareInferMeta(const MetaTensor& x, ...@@ -1201,3 +1477,4 @@ void ValueCompareInferMeta(const MetaTensor& x,
} // namespace phi } // namespace phi
PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta); PD_REGISTER_INFER_META_FN(add_raw, phi::ElementwiseRawInferMeta);
PD_REGISTER_INFER_META_FN(conv2d, phi::ConvInferMeta);
...@@ -69,6 +69,20 @@ void CompareInferMeta(const MetaTensor& x, ...@@ -69,6 +69,20 @@ void CompareInferMeta(const MetaTensor& x,
int axis, int axis,
MetaTensor* out); MetaTensor* out);
void ConvInferMeta(const MetaTensor& input,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& paddding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search,
MetaTensor* out,
MetaConfig config = MetaConfig());
void CrossInferMeta(const MetaTensor& x, void CrossInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
int axis, int axis,
...@@ -138,6 +152,10 @@ void LogLossInferMeta(const MetaTensor& input, ...@@ -138,6 +152,10 @@ void LogLossInferMeta(const MetaTensor& input,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void MaskedSelectInferMeta(const MetaTensor& x,
const MetaTensor& mask,
MetaTensor* out);
void MatmulInferMeta(const MetaTensor& x, void MatmulInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
bool trans_x, bool trans_x,
...@@ -180,6 +198,20 @@ void TriangularSolveInferMeta(const MetaTensor& x, ...@@ -180,6 +198,20 @@ void TriangularSolveInferMeta(const MetaTensor& x,
bool unitriangular, bool unitriangular,
MetaTensor* out); MetaTensor* out);
void YoloBoxInferMeta(const MetaTensor& x,
const MetaTensor& img_size,
const std::vector<int>& anchors,
int class_num,
float conf_thresh,
int downsample_ratio,
bool clip_bbox,
float scale_x_y,
bool iou_aware,
float iou_aware_factor,
MetaTensor* boxes,
MetaTensor* scores,
MetaConfig config = MetaConfig());
void ValueCompareInferMeta(const MetaTensor& x, void ValueCompareInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
MetaTensor* out, MetaTensor* out,
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#include "paddle/phi/infermeta/multiary.h" #include "paddle/phi/infermeta/multiary.h"
#include <vector> #include <vector>
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/meta_tensor.h"
#include "paddle/phi/kernels/funcs/concat_funcs.h" #include "paddle/phi/kernels/funcs/concat_funcs.h"
namespace phi { namespace phi {
...@@ -200,6 +202,114 @@ void AucInferMeta(const MetaTensor& input, ...@@ -200,6 +202,114 @@ void AucInferMeta(const MetaTensor& input,
} }
} }
void BatchNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
float momentum,
float epsilon,
const std::string& data_layout_str,
bool is_test,
bool use_global_stats,
bool trainable_statistics,
bool fuse_with_relu,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space,
MetaConfig config) {
const auto x_dims = x.dims();
for (int i = 0; i < x_dims.size(); i++) {
PADDLE_ENFORCE_EQ(
(x_dims[i] == -1) || (x_dims[i] > 0),
true,
phi::errors::InvalidArgument(
"Each dimension of input tensor is expected to be -1 or a "
"positive number, but recieved %d. Input's shape is [%s].",
x_dims[i],
x_dims));
}
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_layout_str);
PADDLE_ENFORCE_GE(
x_dims.size(),
2,
phi::errors::InvalidArgument(
"ShapeError: the dimension of input "
"X must greater than or equal to 2. But received: the shape of input "
"X = [%s], the dimension of input X =[%d]",
x_dims,
x_dims.size()));
PADDLE_ENFORCE_LE(
x_dims.size(),
5,
phi::errors::InvalidArgument(
"ShapeError: the dimension of input X "
"must smaller than or equal to 5. But received: the shape of input X "
"= [%s], the dimension of input X = [%d]",
x_dims,
x_dims.size()));
const int64_t C = ((config.is_run_mkldnn_kernel == true) ||
(data_layout == DataLayout::kNCHW)
? x_dims[1]
: x_dims[x_dims.size() - 1]);
auto scale_dim = scale.dims();
auto bias_dim = bias.dims();
PADDLE_ENFORCE_EQ(
scale_dim.size(),
1UL,
phi::errors::InvalidArgument(
"ShapeError: the dimension of scale must equal to 1."
"But received: the shape of scale is [%s], the dimension "
"of scale is [%d]",
scale_dim,
scale_dim.size()));
PADDLE_ENFORCE_EQ(bias_dim.size(),
1UL,
phi::errors::InvalidArgument(
"ShapeError: the dimension of bias must equal to 1."
"But received: the shape of bias is [%s],the dimension "
"of bias is [%d]",
bias_dim,
bias_dim.size()));
bool check = true;
if ((!config.is_runtime) &&
(phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(scale_dim[0],
C,
phi::errors::InvalidArgument(
"ShapeError: the shape of scale must equal to [%d]"
"But received: the shape of scale is [%d]",
C,
scale_dim[0]));
PADDLE_ENFORCE_EQ(bias_dim[0],
C,
phi::errors::InvalidArgument(
"ShapeError: the shape of bias must equal to [%d]"
"But received: the shape of bias is [%d]",
C,
bias_dim[0]));
}
y->set_dims(x_dims);
mean_out->set_dims({C});
variance_out->set_dims({C});
saved_mean->set_dims({C});
saved_variance->set_dims({C});
y->share_lod(x);
}
void BilinearTensorProductInferMeta(const MetaTensor& x, void BilinearTensorProductInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const MetaTensor& weight, const MetaTensor& weight,
...@@ -577,3 +687,5 @@ void WhereInferMeta(const MetaTensor& condition, ...@@ -577,3 +687,5 @@ void WhereInferMeta(const MetaTensor& condition,
} }
} // namespace phi } // namespace phi
PD_REGISTER_INFER_META_FN(batch_norm, phi::BatchNormInferMeta);
...@@ -72,6 +72,26 @@ void AucInferMeta(const MetaTensor& input, ...@@ -72,6 +72,26 @@ void AucInferMeta(const MetaTensor& input,
MetaTensor* stat_neg_out, MetaTensor* stat_neg_out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void BatchNormInferMeta(const MetaTensor& x,
const MetaTensor& scale,
const MetaTensor& bias,
const MetaTensor& mean,
const MetaTensor& variance,
float momentum,
float epsilon,
const std::string& data_layout,
bool is_test,
bool use_global_stats,
bool trainable_statistics,
bool fuse_with_relu,
MetaTensor* y,
MetaTensor* mean_out,
MetaTensor* variance_out,
MetaTensor* saved_mean,
MetaTensor* saved_variance,
MetaTensor* reserve_space,
MetaConfig config = MetaConfig());
void BilinearTensorProductInferMeta(const MetaTensor& x, void BilinearTensorProductInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const MetaTensor& weight, const MetaTensor& weight,
......
...@@ -304,6 +304,17 @@ void DiagonalInferMeta(const MetaTensor& input, ...@@ -304,6 +304,17 @@ void DiagonalInferMeta(const MetaTensor& input,
out->set_dims(phi::make_ddim(out_dims)); out->set_dims(phi::make_ddim(out_dims));
} }
void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask) {
auto x_dims = x.dims();
out->set_dims(x_dims);
out->share_lod(x);
out->set_dtype(x.dtype());
if (mask != nullptr) {
mask->set_dims(x_dims);
}
}
void EighInferMeta(const MetaTensor& x, void EighInferMeta(const MetaTensor& x,
const std::string& uplo, const std::string& uplo,
MetaTensor* out_w, MetaTensor* out_w,
...@@ -392,6 +403,26 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x, ...@@ -392,6 +403,26 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
UnchangedInferMetaCheckAxis(x, axis, out); UnchangedInferMetaCheckAxis(x, axis, out);
} }
void HistogramInferMeta(
const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out) {
PADDLE_ENFORCE_GE(bins,
1,
phi::errors::InvalidArgument(
"The bins should be greater than or equal to 1."
"But received nbins is %d",
bins));
PADDLE_ENFORCE_GE(
max,
min,
phi::errors::InvalidArgument("max must be larger or equal to min."
"But received max is %d, min is %d",
max,
min));
out->set_dims({bins});
out->share_lod(input);
}
void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out) { void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
product(x.dims()), product(x.dims()),
...@@ -787,6 +818,24 @@ void MultinomialInferMeta(const MetaTensor& x, ...@@ -787,6 +818,24 @@ void MultinomialInferMeta(const MetaTensor& x,
out->set_dtype(DataType::INT64); out->set_dtype(DataType::INT64);
} }
void NormInferMeta(const MetaTensor& x,
int axis,
float epsilon,
bool is_test,
MetaTensor* out,
MetaTensor* norm) {
auto xdim = x.dims();
out->set_dims(x.dims());
out->set_dtype(x.dtype());
if (is_test == false) {
if (axis < 0) axis = xdim.size() + axis;
xdim[axis] = 1;
norm->set_dims(xdim);
norm->set_dtype(x.dtype());
}
}
void PadInferMeta(const MetaTensor& input, void PadInferMeta(const MetaTensor& input,
const std::vector<int>& paddings, const std::vector<int>& paddings,
float pad_value, float pad_value,
......
...@@ -74,6 +74,8 @@ void DiagInferMeta(const MetaTensor& x, ...@@ -74,6 +74,8 @@ void DiagInferMeta(const MetaTensor& x,
void DiagonalInferMeta( void DiagonalInferMeta(
const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out); const MetaTensor& input, int offset, int axis1, int axis2, MetaTensor* out);
void DropoutInferMeta(const MetaTensor& x, MetaTensor* out, MetaTensor* mask);
void EighInferMeta(const MetaTensor& x, void EighInferMeta(const MetaTensor& x,
const std::string& uplo, const std::string& uplo,
MetaTensor* out_w, MetaTensor* out_w,
...@@ -89,6 +91,8 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x, ...@@ -89,6 +91,8 @@ void GumbelSoftmaxInferMeta(const MetaTensor& x,
bool hard, bool hard,
int axis, int axis,
MetaTensor* out); MetaTensor* out);
void HistogramInferMeta(
const MetaTensor& input, int64_t bins, int min, int max, MetaTensor* out);
void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out); void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out);
...@@ -130,6 +134,12 @@ void MultinomialInferMeta(const MetaTensor& x, ...@@ -130,6 +134,12 @@ void MultinomialInferMeta(const MetaTensor& x,
int num_samples, int num_samples,
bool replacement, bool replacement,
MetaTensor* out); MetaTensor* out);
void NormInferMeta(const MetaTensor& x,
int axis,
float epsilon,
bool is_test,
MetaTensor* out,
MetaTensor* norm);
void PadInferMeta(const MetaTensor& input, void PadInferMeta(const MetaTensor& input,
const std::vector<int>& paddings, const std::vector<int>& paddings,
......
...@@ -19,6 +19,7 @@ import paddle.inference as paddle_infer ...@@ -19,6 +19,7 @@ import paddle.inference as paddle_infer
from functools import partial from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set from typing import Optional, List, Callable, Dict, Any, Set
import unittest import unittest
import paddle
import hypothesis import hypothesis
from hypothesis import given, settings, seed, example, assume from hypothesis import given, settings, seed, example, assume
...@@ -104,4 +105,5 @@ class TestConvGeluMkldnnFusePass(PassAutoScanTest): ...@@ -104,4 +105,5 @@ class TestConvGeluMkldnnFusePass(PassAutoScanTest):
if __name__ == "__main__": if __name__ == "__main__":
paddle.enable_static()
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册