提交 b3a5fc1a 编写于 作者: H HappyAngel 提交者: GitHub

update conv 2-pad to 4-pad (#2404)

* fix conv 2-pad to 4-pad

* fix compute conv shape

* fix pad, test=develop

* change conv_depthwise_3x3s1_fp.cc name to conv3x3s1p01_depthwise_fp32.cc to distinguish between conv3x3s1_depthwise_fp32.cc

* delete printf note in conv3x3s1, test=develop

* delete printf note, test=develop

* delete gem_sdot.h, test=develop

it is coped from __gemm_sdot_meta_.h

* update compute padding, test=develop

* fix padding size, must be 2 or 4. test=develop

* fix format in operators/conv_op.cc, test=develop

* change #if 0 to #if 1, test=develop

* put 2-pad to 4-pad in AttachImpl, test=develop

* fix clang-format error inn tests/math/connv_compute_test, test=develop

* fix x86 test result error, test=develop

* add asymmetric padding test case in liite/tests/math/conv_compute.cc, test=develop

* change paddings type to support dynamically modify, test=develop

* fix x86 build error in connv_compute_test, test=develop

* fix opencl build error, test=develop

* fix oopencl build error, test=develop

* fix  opencl/conv_compute build error, test=develop

* fix  opencl/conv_compute build error, test=develop

* fix format in kernels/opencl/conv_computte_ttest,test=develop

* fix build error, test=develop

fix build error in kernels/x86/conv_compute.h
上级 f68ea81c
...@@ -35,9 +35,10 @@ size_t conv3x3s1_direct_workspace_size(const operators::ConvParam& param, ...@@ -35,9 +35,10 @@ size_t conv3x3s1_direct_workspace_size(const operators::ConvParam& param,
auto dim_in = param.x->dims(); auto dim_in = param.x->dims();
auto dim_out = param.output->dims(); auto dim_out = param.output->dims();
const int threads = ctx->threads(); const int threads = ctx->threads();
auto paddings = *param.paddings;
int llc_size = ctx->llc_size() / sizeof(float); int llc_size = ctx->llc_size() / sizeof(float);
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
int ow = dim_out[3]; int ow = dim_out[3];
int oh = dim_out[2]; int oh = dim_out[2];
int ic = dim_in[1]; int ic = dim_in[1];
...@@ -74,9 +75,10 @@ void conv_3x3s1_direct_fp32(const float* i_data, ...@@ -74,9 +75,10 @@ void conv_3x3s1_direct_fp32(const float* i_data,
ARMContext* ctx) { ARMContext* ctx) {
const int threads = ctx->threads(); const int threads = ctx->threads();
int l2_size = ctx->llc_size() / sizeof(float); int l2_size = ctx->llc_size() / sizeof(float);
auto paddings = *param.paddings;
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int wout_round = ROUNDUP(ow, OUT_W_BLOCK); const int wout_round = ROUNDUP(ow, OUT_W_BLOCK);
const int win_round = wout_round + 2; const int win_round = wout_round + 2;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
......
...@@ -41,10 +41,11 @@ void conv_3x3s1_direct_int8(const int8_t* din, ...@@ -41,10 +41,11 @@ void conv_3x3s1_direct_int8(const int8_t* din,
const operators::ConvParam& param, const operators::ConvParam& param,
Context<TARGET(kARM)>* ctx, Context<TARGET(kARM)>* ctx,
const float* scale) { const float* scale) {
auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[2];
const int threads = ctx->threads(); const int threads = ctx->threads();
int llc_size = ctx->llc_size() / 4; int llc_size = ctx->llc_size() / 4;
......
...@@ -39,8 +39,11 @@ void conv_3x3s1_depthwise_fp32(const float* i_data, ...@@ -39,8 +39,11 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx) { ARMContext* ctx) {
int threads = ctx->threads(); int threads = ctx->threads();
const int pad_h = param.paddings[0];
const int pad_w = param.paddings[1]; auto paddings = *param.paddings;
const int pad_h = paddings[0];
const int pad_w = paddings[2];
const int out_c_block = 4; const int out_c_block = 4;
const int out_h_kernel = 2; const int out_h_kernel = 2;
const int out_w_kernel = 4; const int out_w_kernel = 4;
......
...@@ -32,10 +32,11 @@ size_t conv3x3s2_direct_workspace_size(const operators::ConvParam& param, ...@@ -32,10 +32,11 @@ size_t conv3x3s2_direct_workspace_size(const operators::ConvParam& param,
ARMContext* ctx) { ARMContext* ctx) {
auto dim_in = param.x->dims(); auto dim_in = param.x->dims();
auto dim_out = param.output->dims(); auto dim_out = param.output->dims();
auto paddings = *param.paddings;
const int threads = ctx->threads(); const int threads = ctx->threads();
int llc_size = ctx->llc_size() / sizeof(float); int llc_size = ctx->llc_size() / sizeof(float);
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
int ow = dim_out[3]; int ow = dim_out[3];
int oh = dim_out[2]; int oh = dim_out[2];
int ic = dim_in[1]; int ic = dim_in[1];
...@@ -73,10 +74,11 @@ void conv_3x3s2_direct_fp32(const float* i_data, ...@@ -73,10 +74,11 @@ void conv_3x3s2_direct_fp32(const float* i_data,
//! 3x3s2 convolution, implemented by direct algorithm //! 3x3s2 convolution, implemented by direct algorithm
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings;
const int threads = ctx->threads(); const int threads = ctx->threads();
int l2_size = ctx->llc_size() / sizeof(float); int l2_size = ctx->llc_size() / sizeof(float);
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
const int wout_round = ROUNDUP(ow, OUT_W_BLOCK); const int wout_round = ROUNDUP(ow, OUT_W_BLOCK);
const int win_round = wout_round * 2 /*stride_w*/ + 1; const int win_round = wout_round * 2 /*stride_w*/ + 1;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
......
...@@ -46,10 +46,11 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -46,10 +46,11 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! 3x3s2 int8 convolution, implemented by direct algorithm //! 3x3s2 int8 convolution, implemented by direct algorithm
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[1];
const int threads = ctx->threads(); const int threads = ctx->threads();
int llc_size = ctx->llc_size() / 4; int llc_size = ctx->llc_size() / 4;
...@@ -472,10 +473,11 @@ void conv_3x3s2_direct_int8(const int8_t* din, ...@@ -472,10 +473,11 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! 3x3s2 int8 convolution, implemented by direct algorithm //! 3x3s2 int8 convolution, implemented by direct algorithm
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias; bool flag_bias = param.bias;
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[1];
const int threads = ctx->threads(); const int threads = ctx->threads();
//! set 1/4 l2 cache //! set 1/4 l2 cache
int llc_size = ctx->llc_size() / 4; int llc_size = ctx->llc_size() / 4;
......
...@@ -39,9 +39,10 @@ void conv_3x3s2_depthwise_fp32(const float* i_data, ...@@ -39,9 +39,10 @@ void conv_3x3s2_depthwise_fp32(const float* i_data,
const float* bias, const float* bias,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx) { ARMContext* ctx) {
auto paddings = *param.paddings;
int threads = ctx->threads(); int threads = ctx->threads();
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int out_c_block = 4; const int out_c_block = 4;
const int out_h_kernel = 1; const int out_h_kernel = 1;
const int out_w_kernel = 4; const int out_w_kernel = 4;
......
...@@ -85,38 +85,6 @@ void conv_depthwise_3x3s2_fp32(const float* din, ...@@ -85,38 +85,6 @@ void conv_depthwise_3x3s2_fp32(const float* din,
bool flag_relu, bool flag_relu,
ARMContext* ctx); ARMContext* ctx);
void conv_depthwise_3x3p0_fp32(const float* din,
float* dout,
int num,
int ch_out,
int h_out,
int w_out,
int ch_in,
int h_in,
int w_in,
const float* weights,
const float* bias,
int stride,
bool flag_bias,
bool flag_relu,
ARMContext* ctx);
void conv_depthwise_3x3p1_fp32(const float* din,
float* dout,
int num,
int ch_out,
int h_out,
int w_out,
int ch_in,
int h_in,
int w_in,
const float* weights,
const float* bias,
int stride,
bool flag_bias,
bool flag_relu,
ARMContext* ctx);
template <typename Dtype> template <typename Dtype>
void conv_depthwise_3x3s1_int8(Dtype* dout, void conv_depthwise_3x3s1_int8(Dtype* dout,
const int8_t* din, const int8_t* din,
......
...@@ -107,29 +107,35 @@ void im2col(const Dtype* data_im, ...@@ -107,29 +107,35 @@ void im2col(const Dtype* data_im,
int width, int width,
int kernel_h, int kernel_h,
int kernel_w, int kernel_w,
int pad_h, int pad_top,
int pad_w, int pad_bottom,
int pad_left,
int pad_right,
int stride_h, int stride_h,
int stride_w, int stride_w,
int dilation_h, int dilation_h,
int dilation_w, int dilation_w,
Dtype* data_col) { Dtype* data_col) {
const int output_h = const int output_h =
(height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; (height + pad_top + pad_bottom - (dilation_h * (kernel_h - 1) + 1)) /
stride_h +
1;
const int output_w = const int output_w =
(width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; (width + pad_left + pad_right - (dilation_w * (kernel_w - 1) + 1)) /
stride_w +
1;
const int channel_size = height * width; const int channel_size = height * width;
for (int channel = channels; channel--; data_im += channel_size) { for (int channel = channels; channel--; data_im += channel_size) {
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_row = -pad_h + kernel_row * dilation_h; int input_row = -pad_top + kernel_row * dilation_h;
for (int output_rows = output_h; output_rows; output_rows--) { for (int output_rows = output_h; output_rows; output_rows--) {
if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
for (int output_cols = output_w; output_cols; output_cols--) { for (int output_cols = output_w; output_cols; output_cols--) {
*(data_col++) = 0; *(data_col++) = 0;
} }
} else { } else {
int input_col = -pad_w + kernel_col * dilation_w; int input_col = -pad_left + kernel_col * dilation_w;
for (int output_col = output_w; output_col; output_col--) { for (int output_col = output_w; output_col; output_col--) {
if (is_a_ge_zero_and_a_lt_b(input_col, width)) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
*(data_col++) = data_im[input_row * width + input_col]; *(data_col++) = data_im[input_row * width + input_col];
...@@ -361,6 +367,9 @@ void conv_im2col_gemm(const float* i_data, ...@@ -361,6 +367,9 @@ void conv_im2col_gemm(const float* i_data,
float* tmp_work_space = float* tmp_work_space =
ctx->workspace_data<float>() + ctx->llc_size() / sizeof(float); ctx->workspace_data<float>() + ctx->llc_size() / sizeof(float);
auto paddings = *param.paddings;
auto dilations = *param.dilations;
//! use gemv when the output channel size = 1 //! use gemv when the output channel size = 1
for (int b = 0; b < num; ++b) { for (int b = 0; b < num; ++b) {
// dC // dC
...@@ -378,12 +387,14 @@ void conv_im2col_gemm(const float* i_data, ...@@ -378,12 +387,14 @@ void conv_im2col_gemm(const float* i_data,
win, win,
kernel_h, kernel_h,
kernel_w, kernel_w,
param.paddings[0], paddings[0],
param.paddings[1], paddings[1],
paddings[2],
paddings[3],
param.strides[0], param.strides[0],
param.strides[1], param.strides[1],
param.dilations[0], dilations[0],
param.dilations[1], dilations[1],
dB); dB);
if (n == 1) { if (n == 1) {
...@@ -435,14 +446,16 @@ void conv_im2col_gemm_int8(const int8_t* i_data, ...@@ -435,14 +446,16 @@ void conv_im2col_gemm_int8(const int8_t* i_data,
const float* scale) { const float* scale) {
int group = param.groups; int group = param.groups;
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int kernel_h = filter_dims[2]; int kernel_h = filter_dims[2];
int kernel_w = filter_dims[3]; int kernel_w = filter_dims[3];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
int dila_h = param.dilations[0]; int dila_h = dilations[0];
int dila_w = param.dilations[1]; int dila_w = dilations[1];
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[2];
const int m = oc / group; const int m = oc / group;
const int n = oh * ow; const int n = oh * ow;
const int k = ic * kernel_h * kernel_w / group; const int k = ic * kernel_h * kernel_w / group;
...@@ -483,7 +496,9 @@ void conv_im2col_gemm_int8(const int8_t* i_data, ...@@ -483,7 +496,9 @@ void conv_im2col_gemm_int8(const int8_t* i_data,
kernel_h, kernel_h,
kernel_w, kernel_w,
pad_h, pad_h,
paddings[1],
pad_w, pad_w,
paddings[3],
stride_h, stride_h,
stride_w, stride_w,
dila_h, dila_h,
...@@ -563,90 +578,83 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -563,90 +578,83 @@ void conv_depthwise_3x3_fp32(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
const int pad_h = param.paddings[0]; auto paddings = *param.paddings;
const int pad_w = param.paddings[1]; const int pad_h = paddings[0];
if (pad_w != pad_h) { const int pad_w = paddings[2];
LOG(FATAL) << "fp32 depthwise conv3x3 pad_w: " << pad_w
<< ", pad_h: " << pad_h << " must be equal";
return;
}
int stride = param.strides[1]; int stride = param.strides[1];
int pad = pad_w; int pad = pad_w;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
if (stride == 1 && pad < 2) { // support pad = [0, 1] bool pads_equal =
conv_depthwise_3x3s1_fp32(reinterpret_cast<const float*>(din), ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
reinterpret_cast<float*>(dout), if (stride == 1) {
num, if (pads_equal && (pad_h == pad_w) && (pad < 2)) { // support pad = [0, 1]
ch_out, conv_depthwise_3x3s1_fp32(reinterpret_cast<const float*>(din),
h_out, reinterpret_cast<float*>(dout),
w_out, num,
ch_in, ch_out,
h_in, h_out,
w_in, w_out,
reinterpret_cast<const float*>(weights), ch_in,
bias, h_in,
pad, w_in,
flag_bias, reinterpret_cast<const float*>(weights),
flag_relu, bias,
ctx); pad,
} else if (stride == 2 && pad < 2) { // support pad = [0, 1] flag_bias,
conv_depthwise_3x3s2_fp32(reinterpret_cast<const float*>(din), flag_relu,
reinterpret_cast<float*>(dout), ctx);
num, } else {
ch_out, conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
h_out, reinterpret_cast<float*>(dout),
w_out, num,
ch_in, ch_out,
h_in, h_out,
w_in, w_out,
reinterpret_cast<const float*>(weights), ch_in,
bias, h_in,
pad, w_in,
flag_bias, reinterpret_cast<const float*>(weights),
flag_relu, bias,
ctx); param,
} else { ctx);
LOG(FATAL) << "fp32 depthwise conv3x3 stride: " << stride }
<< " or pad(<2): " << pad << " unsupported";
} } else if (stride == 2) {
#if 0 if (pad_h == pad_w && (pad < 2)) { // support pad = [0, 1]
if (pad == 1) { conv_depthwise_3x3s2_fp32(reinterpret_cast<const float*>(din),
conv_depthwise_3x3p1_fp32(reinterpret_cast<const float*>(din), reinterpret_cast<float*>(dout),
reinterpret_cast<float*>(dout), num,
num, ch_out,
ch_out, h_out,
h_out, w_out,
w_out, ch_in,
ch_in, h_in,
h_in, w_in,
w_in, reinterpret_cast<const float*>(weights),
reinterpret_cast<const float*>(weights), bias,
bias, pad,
stride, flag_bias,
flag_bias, flag_relu,
flag_relu, ctx);
ctx); } else {
} else if (pad == 0 && h_in > 2) { conv_3x3s2_depthwise_fp32(reinterpret_cast<const float*>(din),
conv_depthwise_3x3p0_fp32(reinterpret_cast<const float*>(din), reinterpret_cast<float*>(dout),
reinterpret_cast<float*>(dout), num,
num, ch_out,
ch_out, h_out,
h_out, w_out,
w_out, ch_in,
ch_in, h_in,
h_in, w_in,
w_in, reinterpret_cast<const float*>(weights),
reinterpret_cast<const float*>(weights), bias,
bias, param,
stride, ctx);
flag_bias, }
flag_relu,
ctx);
} else { } else {
LOG(FATAL) << "unsupport this type 3x3 dw conv"; LOG(FATAL) << "fp32 depthwise conv3x3 stride: " << stride << " unsupported";
} }
#endif
} }
void conv_depthwise_5x5_fp32(const void* din, void conv_depthwise_5x5_fp32(const void* din,
...@@ -663,7 +671,8 @@ void conv_depthwise_5x5_fp32(const void* din, ...@@ -663,7 +671,8 @@ void conv_depthwise_5x5_fp32(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
int pad = param.paddings[1]; auto paddings = *param.paddings;
int pad = paddings[0];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
...@@ -719,8 +728,9 @@ void conv_depthwise_3x3_int8_fp32(const void* din, ...@@ -719,8 +728,9 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1]; int pad_h = paddings[0];
int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
...@@ -777,8 +787,9 @@ void conv_depthwise_3x3_int8_int8(const void* din, ...@@ -777,8 +787,9 @@ void conv_depthwise_3x3_int8_int8(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1]; int pad_h = paddings[0];
int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
...@@ -835,8 +846,9 @@ void conv_depthwise_5x5_int8_fp32(const void* din, ...@@ -835,8 +846,9 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1]; int pad_h = paddings[0];
int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
...@@ -876,8 +888,9 @@ void conv_depthwise_5x5_int8_int8(const void* din, ...@@ -876,8 +888,9 @@ void conv_depthwise_5x5_int8_int8(const void* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1]; int pad_h = paddings[0];
int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
......
...@@ -37,9 +37,9 @@ void conv_winograd3x3(const float* din, ...@@ -37,9 +37,9 @@ void conv_winograd3x3(const float* din,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx) { ARMContext* ctx) {
int threads = ctx->threads(); int threads = ctx->threads();
auto paddings = *param.paddings;
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
const int pad_w = param.paddings[1]; const int pad_w = paddings[1];
int size_in_channel = win * hin; int size_in_channel = win * hin;
int size_out_channel = wout * hout; int size_out_channel = wout * hout;
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
......
...@@ -31,6 +31,9 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param, ...@@ -31,6 +31,9 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param,
auto o_dims = param.output->dims(); auto o_dims = param.output->dims();
int batch = x_dims[0]; int batch = x_dims[0];
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int iw = x_dims[3]; // nchw int iw = x_dims[3]; // nchw
int ih = x_dims[2]; int ih = x_dims[2];
int ic = x_dims[1]; int ic = x_dims[1];
...@@ -41,10 +44,10 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param, ...@@ -41,10 +44,10 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param,
int kh = w_dims[2]; int kh = w_dims[2];
int sw = param.strides[1]; int sw = param.strides[1];
int sh = param.strides[0]; int sh = param.strides[0];
int pw = param.paddings[1]; int pw = paddings[2];
int ph = param.paddings[0]; int ph = paddings[0];
int dw = param.dilations[1]; int dw = dilations[1];
int dh = param.dilations[0]; int dh = dilations[0];
CHECK(ic % param.groups == 0) CHECK(ic % param.groups == 0)
<< "The conv input channel shoud be divide group number."; << "The conv input channel shoud be divide group number.";
...@@ -133,8 +136,8 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param, ...@@ -133,8 +136,8 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param,
this->fwd_algo_ = algo_cache.GetAlgorithm(x_dims.Vectorize(), this->fwd_algo_ = algo_cache.GetAlgorithm(x_dims.Vectorize(),
w_dims.Vectorize(), w_dims.Vectorize(),
param.strides, param.strides,
param.paddings, *param.paddings,
param.dilations, *param.dilations,
0, 0,
search_func); search_func);
...@@ -311,12 +314,15 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param, ...@@ -311,12 +314,15 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param,
int kw = w_dims[2]; int kw = w_dims[2];
int kh = w_dims[1]; int kh = w_dims[1];
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int sw = param.strides[1]; int sw = param.strides[1];
int sh = param.strides[0]; int sh = param.strides[0];
int pw = param.paddings[1]; int pw = paddings[2];
int ph = param.paddings[0]; int ph = paddings[0];
int dw = param.dilations[1]; int dw = dilations[1];
int dh = param.dilations[0]; int dh = dilations[0];
std::vector<float> weight_scale = param.weight_scale; std::vector<float> weight_scale = param.weight_scale;
float input_scale = param.input_scale; float input_scale = param.input_scale;
......
...@@ -294,10 +294,17 @@ inline void split_filter_num(const ConvParam& c_param) { ...@@ -294,10 +294,17 @@ inline void split_filter_num(const ConvParam& c_param) {
args.image.channels = input->shape().channel(); args.image.channels = input->shape().channel();
args.image.width = input->shape().width(); args.image.width = input->shape().width();
args.image.height = input->shape().height(); args.image.height = input->shape().height();
args.image.pad_width = param.paddings[1]; auto paddings = *param.padding;
args.image.pad_width = param.paddings[2];
args.image.pad_height = param.paddings[0]; args.image.pad_height = param.paddings[0];
args.output.address = out_address; args.output.address = out_address;
args.output.scale_address = out_scale_address; args.output.scale_address = out_scale_address;
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", "
<< paddings[1] << ", " << paddings[2] << ", " << paddings[3];
}
param.splitParams().push_back(conv_param); param.splitParams().push_back(conv_param);
} }
} }
...@@ -372,10 +379,18 @@ inline void split_channel(const ConvParam& c_param) { ...@@ -372,10 +379,18 @@ inline void split_channel(const ConvParam& c_param) {
args.image.channels = conv_param->input.shape().channel(); args.image.channels = conv_param->input.shape().channel();
args.image.width = conv_param->input.shape().width(); args.image.width = conv_param->input.shape().width();
args.image.height = conv_param->input.shape().height(); args.image.height = conv_param->input.shape().height();
args.image.pad_width = param.paddings[1]; auto paddings = *param.paddings;
args.image.pad_height = param.paddings[0]; args.image.pad_width = paddings[2];
args.image.pad_height = paddings[0];
args.output.address = conv_param->output.mutableData<void>(); args.output.address = conv_param->output.mutableData<void>();
args.output.scale_address = conv_param->output.scale(); args.output.scale_address = conv_param->output.scale();
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", "
<< paddings[1] << ", " << paddings[2] << ", " << paddings[3];
}
param.splitParams().push_back(conv_param); param.splitParams().push_back(conv_param);
} }
} }
......
...@@ -61,14 +61,21 @@ class DepthwiseConvPE : public PE { ...@@ -61,14 +61,21 @@ class DepthwiseConvPE : public PE {
args.image.channels = input->shape().channel(); args.image.channels = input->shape().channel();
args.image.height = input->shape().height(); args.image.height = input->shape().height();
args.image.width = input->shape().width(); args.image.width = input->shape().width();
args.image.pad_width = param.paddings[0]; auto paddings = *param.paddings;
args.image.pad_height = param.paddings[1]; args.image.pad_width = param.paddings[2];
args.image.pad_height = param.paddings[0];
args.image.scale_address = input->scale(); args.image.scale_address = input->scale();
args.output.address = output->data<void>(); args.output.address = output->data<void>();
args.output.scale_address = output->scale(); args.output.scale_address = output->scale();
args.out_width = param.output->shape().width(); args.out_width = param.output->shape().width();
args.out_height = param.output->shape().height(); args.out_height = param.output->shape().height();
args.sub_conv_num = 1; args.sub_conv_num = 1;
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", "
<< paddings[1] << ", " << paddings[2] << ", " << paddings[3];
}
param.args = args; param.args = args;
inplace_.relu_enable = param_.relu.enabled; inplace_.relu_enable = param_.relu.enabled;
......
...@@ -32,13 +32,17 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -32,13 +32,17 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
auto w_dims = param.filter->dims(); auto w_dims = param.filter->dims();
auto& ctx = this->ctx_->template As<ARMContext>(); auto& ctx = this->ctx_->template As<ARMContext>();
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int ic = w_dims[1] * param.groups; int ic = w_dims[1] * param.groups;
int oc = w_dims[0]; int oc = w_dims[0];
int kh = w_dims[2]; // oihw int kh = w_dims[2]; // oihw
int kw = w_dims[3]; int kw = w_dims[3];
int pad = param.paddings[0]; int pad = paddings[0];
int stride = param.strides[0]; int stride = param.strides[0];
bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
int chin = param.x->dims()[1]; int chin = param.x->dims()[1];
int hin = param.x->dims()[2]; int hin = param.x->dims()[2];
int win = param.x->dims()[3]; int win = param.x->dims()[3];
...@@ -46,16 +50,18 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -46,16 +50,18 @@ void ConvCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
int hout = param.output->dims()[2]; int hout = param.output->dims()[2];
int wout = param.output->dims()[3]; int wout = param.output->dims()[3];
bool kps_equal = (param.paddings[0] == param.paddings[1]) && bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);
(param.strides[0] == param.strides[1]) && (kw == kh);
bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1); bool kps_equal = (param.strides[0] == param.strides[1]) && (kw == kh);
bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
bool flag_dw_3x3 = (kw == 3 && kh == 3 && (stride == 1 || stride == 2)); bool flag_dw_3x3 = (kw == 3 && kh == 3 && (stride == 1 || stride == 2));
bool flag_dw_5x5 = bool flag_dw_5x5 = pads_all_equal && ((kw == 5 && stride == 1) ||
(kw == 5 && stride == 1) || (kw == 5 && stride == 2 && pad == 2); (kw == 5 && stride == 2 && pad == 2));
bool flag_dw = flag_dw_3x3 || flag_dw_5x5; bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
/// select conv impl /// select conv impl
if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) { if (param.groups == ic && ic == oc && kps_equal && pads_equal &&
no_dilation && flag_dw) {
/// dw conv impl /// dw conv impl
impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>; impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
VLOG(3) << "invoking dw conv"; VLOG(3) << "invoking dw conv";
...@@ -92,22 +98,29 @@ void ConvCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() { ...@@ -92,22 +98,29 @@ void ConvCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
auto& ctx = this->ctx_->template As<ARMContext>(); auto& ctx = this->ctx_->template As<ARMContext>();
auto paddings = *param.paddings;
auto dilations = *param.dilations;
bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
int ic = param.groups * w_dims[1]; int ic = param.groups * w_dims[1];
int oc = w_dims[0]; int oc = w_dims[0];
int kh = w_dims[2]; // oihw int kh = w_dims[2]; // oihw
int kw = w_dims[3]; int kw = w_dims[3];
int ph = param.paddings[1]; int ph = paddings[0];
int pw = param.paddings[0]; int pw = paddings[2];
int sh = param.strides[1]; int sh = param.strides[1];
int sw = param.strides[0]; int sw = param.strides[0];
bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);
bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh); bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1); bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
bool flag_dw_3x3 = (kw == 3 && kh == 3) && (sw == 1 || sw == 2); bool flag_dw_3x3 = (kw == 3 && kh == 3 && (sw == 1 || sw == 2));
bool flag_dw_5x5 = (kw == 5 && sw == 1); bool flag_dw_5x5 = pads_all_equal &&
((kw == 5 && sw == 1) || (kw == 5 && sw == 2 && pw == 2));
bool flag_dw = flag_dw_3x3 || flag_dw_5x5; bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) { if (param.groups == ic && ic == oc && kps_equal && pads_equal &&
no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>; impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>;
VLOG(3) << "Run DepthwiseConv Int8"; VLOG(3) << "Run DepthwiseConv Int8";
} else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) && } else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) &&
...@@ -130,23 +143,30 @@ void ConvCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() { ...@@ -130,23 +143,30 @@ void ConvCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
auto w_dims = param.filter->dims(); auto w_dims = param.filter->dims();
auto& ctx = this->ctx_->template As<ARMContext>(); auto& ctx = this->ctx_->template As<ARMContext>();
auto paddings = *param.paddings;
auto dilations = *param.dilations;
bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
int ic = w_dims[1] * param.groups; int ic = w_dims[1] * param.groups;
int oc = w_dims[0]; int oc = w_dims[0];
int kh = w_dims[2]; // oihw int kh = w_dims[2]; // oihw
int kw = w_dims[3]; int kw = w_dims[3];
int ph = param.paddings[1]; int ph = paddings[0];
int pw = param.paddings[0]; int pw = paddings[2];
int sh = param.strides[1]; int sh = param.strides[1];
int sw = param.strides[0]; int sw = param.strides[0];
bool pads_all_equal = (pads_equal && paddings[0] == paddings[2]);
bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh); bool kps_equal = (pw == ph) && (sh == sw) && (kw == kh);
bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1); bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
bool flag_dw_3x3 = (kw == 3 && kh == 3) && (sw == 1 || sw == 2); bool flag_dw_3x3 = (kw == 3 && kh == 3 && (sw == 1 || sw == 2));
bool flag_dw_5x5 = (kw == 5 && sw == 1); bool flag_dw_5x5 = pads_all_equal &&
((kw == 5 && sw == 1) || (kw == 5 && sw == 2 && pw == 2));
bool flag_dw = flag_dw_3x3 || flag_dw_5x5; bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (param.groups == ic && ic == oc && kps_equal && no_dilation && flag_dw) { if (param.groups == ic && ic == oc && kps_equal && pads_equal &&
no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>; impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>;
VLOG(3) << "Run DepthwiseConv Int8"; VLOG(3) << "Run DepthwiseConv Int8";
} else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) && } else if (param.groups == 1 && kw == 3 && (sw == 1 || sw == 2) &&
......
...@@ -31,19 +31,28 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() { ...@@ -31,19 +31,28 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
// select dw conv kernel // select dw conv kernel
if (kw == 3) { if (kw == 3) {
VLOG(5) << "invoke 3x3 dw conv fp32"; VLOG(5) << "invoke 3x3 dw conv fp32";
// trans weights auto paddings = *param.paddings;
constexpr int cblock = 4; bool pads_equal =
auto oc = w_dims[0]; ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
auto kh = w_dims[2];
auto cround = ROUNDUP(oc, cblock); if (pads_equal && paddings[0] == paddings[2] &&
weights_.Resize({cround, 1, kh, kw}); (paddings[0] == 0 || paddings[0] == 1)) {
// auto w_data = weights_.mutable_data<float>(); impl_ = lite::arm::math::conv_depthwise_3x3_fp32;
// auto w_data_in = param.filter->data<float>(); flag_trans_weights_ = false;
// lite::arm::math::conv_trans_weights_numc( } else {
// w_data_in, w_data, oc, 1, cblock, kh * kw); // trans weights
impl_ = lite::arm::math::conv_depthwise_3x3_fp32; constexpr int cblock = 4;
flag_trans_weights_ = false; auto oc = w_dims[0];
// flag_trans_weights_ = true; auto kh = w_dims[2];
auto cround = ROUNDUP(oc, cblock);
weights_.Resize({cround, 1, kh, kw});
auto w_data = weights_.mutable_data<float>();
auto w_data_in = param.filter->data<float>();
lite::arm::math::conv_trans_weights_numc(
w_data_in, w_data, oc, 1, cblock, kh * kw);
impl_ = lite::arm::math::conv_depthwise_3x3_fp32;
flag_trans_weights_ = true;
}
} else if (kw == 5) { } else if (kw == 5) {
VLOG(5) << "invoke 5x5 dw conv fp32"; VLOG(5) << "invoke 5x5 dw conv fp32";
impl_ = lite::arm::math::conv_depthwise_5x5_fp32; impl_ = lite::arm::math::conv_depthwise_5x5_fp32;
......
...@@ -52,12 +52,19 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -52,12 +52,19 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> {
int oc = o_dims[1]; int oc = o_dims[1];
int kw = w_dims[3]; int kw = w_dims[3];
int kh = w_dims[2]; int kh = w_dims[2];
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int sw = param.strides[1]; int sw = param.strides[1];
int sh = param.strides[0]; int sh = param.strides[0];
int pw = param.paddings[1]; int pw = paddings[2];
int ph = param.paddings[0]; int ph = paddings[0];
int dw = param.dilations[1]; int dw = dilations[1];
int dh = param.dilations[0]; int dh = dilations[0];
bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
int m = oc / param.groups; int m = oc / param.groups;
int k = ic * kh * kw / param.groups; int k = ic * kh * kw / param.groups;
...@@ -66,7 +73,7 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> { ...@@ -66,7 +73,7 @@ class GemmLikeConv : public KernelLite<TARGET(kARM), Ptype> {
bool kps_equal = (pw == ph) && (sw == sh) && (kw == kh); bool kps_equal = (pw == ph) && (sw == sh) && (kw == kh);
bool ks_equal = (sw == sh) && (kw == kh); bool ks_equal = (sw == sh) && (kw == kh);
//! select conv gemmlike kernel //! select conv gemmlike kernel
if (kw == 1 && sw == 1 && pw == 0 && kps_equal) { if (kw == 1 && sw == 1 && pw == 0 && kps_equal && pads_equal) {
//! 1x1s1p0 gemmlike conv //! 1x1s1p0 gemmlike conv
flag_1x1gemm_ = true; flag_1x1gemm_ = true;
} else { } else {
......
...@@ -76,19 +76,27 @@ void Conv2DTransposeCompute::Run() { ...@@ -76,19 +76,27 @@ void Conv2DTransposeCompute::Run() {
bool fuse_relu = param.fuse_relu; bool fuse_relu = param.fuse_relu;
bool flag_bias = (param.bias != nullptr); bool flag_bias = (param.bias != nullptr);
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int m = chout * kw * kh / group; int m = chout * kw * kh / group;
int n = hin * win; int n = hin * win;
int k = chin / group; int k = chin / group;
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
int group_size_in = win * hin * chin / group; int group_size_in = win * hin * chin / group;
int group_size_out = wout * hout * chout / group; int group_size_out = wout * hout * chout / group;
int group_size_coldata = m * n; int group_size_coldata = m * n;
bool pads_all_qual = pads_equal && (paddings[0] == paddings[2]);
int hblock = lite::arm::math::get_hblock(&ctx); int hblock = lite::arm::math::get_hblock(&ctx);
int m_roundup = hblock * ((m + hblock - 1) / hblock); int m_roundup = hblock * ((m + hblock - 1) / hblock);
int group_size_weights = ((m_roundup * k + 15) / 16) * 16; int group_size_weights = ((m_roundup * k + 15) / 16) * 16;
bool flag_1x1s1p1 = (kw == 1) && (kh == 1) && (param.strides[0] == 1) && bool flag_1x1s1p1 = (kw == 1) && (kh == 1) && (param.strides[0] == 1) &&
(param.strides[1] == 1) && (param.paddings[0] == 0) && (param.strides[1] == 1) && pads_all_qual &&
(param.paddings[1] == 0) && (param.dilations[0] == 1) && (dilations[0] == 1) && (dilations[1] == 1);
(param.dilations[1] == 1);
ctx.ExtendWorkspace(sizeof(float) * group * m * n); ctx.ExtendWorkspace(sizeof(float) * group * m * n);
auto din = param.x->data<float>(); auto din = param.x->data<float>();
...@@ -129,12 +137,12 @@ void Conv2DTransposeCompute::Run() { ...@@ -129,12 +137,12 @@ void Conv2DTransposeCompute::Run() {
wout, wout,
kh, kh,
kw, kw,
param.paddings[0], paddings[0],
param.paddings[1], paddings[2],
param.strides[0], param.strides[0],
param.strides[1], param.strides[1],
param.dilations[0], dilations[0],
param.dilations[1], dilations[1],
dout_batch); dout_batch);
} }
if (flag_bias) { if (flag_bias) {
......
...@@ -194,15 +194,18 @@ void conv2d_transpose_compute_ref(const operators::ConvParam& param) { ...@@ -194,15 +194,18 @@ void conv2d_transpose_compute_ref(const operators::ConvParam& param) {
} }
int group = param.groups; int group = param.groups;
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int kernel_h = param.filter->dims()[2]; int kernel_h = param.filter->dims()[2];
int kernel_w = param.filter->dims()[3]; int kernel_w = param.filter->dims()[3];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
int dila_h = param.dilations[0]; int dila_h = dilations[0];
int dila_w = param.dilations[1]; int dila_w = dilations[1];
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[2];
bool flag_bias = (param.bias != nullptr); bool flag_bias = (param.bias != nullptr);
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
...@@ -332,10 +335,14 @@ TEST(conv2d_transpose_arm, compute) { ...@@ -332,10 +335,14 @@ TEST(conv2d_transpose_arm, compute) {
param.bias = &bias; param.bias = &bias;
} }
param.fuse_relu = flag_relu; param.fuse_relu = flag_relu;
param.paddings = std::vector<int>({padding, padding}); std::vector<int> paddings = {
padding, padding, padding, padding};
param.strides = std::vector<int>({stride, stride}); param.strides = std::vector<int>({stride, stride});
std::vector<int> dilations = {dilation, dilation};
param.paddings =
std::make_shared<std::vector<int>>(paddings);
param.dilations = param.dilations =
std::vector<int>({dilation, dilation}); std::make_shared<std::vector<int>>(dilations);
param.groups = group; param.groups = group;
conv2d_transpose.SetParam(param); conv2d_transpose.SetParam(param);
conv2d_transpose.Launch(); conv2d_transpose.Launch();
......
...@@ -21,10 +21,14 @@ namespace lite { ...@@ -21,10 +21,14 @@ namespace lite {
namespace kernels { namespace kernels {
namespace cuda { namespace cuda {
inline int ConvOutputSize( inline int ConvOutputSize(int input_size,
int input_size, int filter_size, int dilation, int padding, int stride) { int filter_size,
int dilation,
int pad_left,
int pad_right,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1; const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1; int output_size = (input_size + pad_left + pad_right - dkernel) / stride + 1;
CHECK_GT_OR_FALSE(output_size, 0); CHECK_GT_OR_FALSE(output_size, 0);
return output_size; return output_size;
...@@ -50,11 +54,15 @@ void ConvComputeInt8<Ptype_out>::PrepareForRun() { ...@@ -50,11 +54,15 @@ void ConvComputeInt8<Ptype_out>::PrepareForRun() {
const auto filter_dims = param.filter->dims(); const auto filter_dims = param.filter->dims();
std::vector<int64_t> output_shape({in_dims[0]}); std::vector<int64_t> output_shape({in_dims[0]});
auto paddings = *param.paddings;
auto dilations = *param.dilations;
for (size_t i = 0; i < param.strides.size(); ++i) { for (size_t i = 0; i < param.strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 1], output_shape.push_back(ConvOutputSize(in_dims[i + 1],
filter_dims[i + 1], filter_dims[i + 1],
param.dilations[i], dilations[i],
param.paddings[i], paddings[2 * i],
paddings[2 * i + 1],
param.strides[i])); param.strides[i]));
} }
output_shape.push_back(filter_dims[0]); output_shape.push_back(filter_dims[0]);
...@@ -71,12 +79,15 @@ void ConvComputeInt8<Ptype_out>::Run() { ...@@ -71,12 +79,15 @@ void ConvComputeInt8<Ptype_out>::Run() {
const auto in_dims = param.x->dims(); const auto in_dims = param.x->dims();
const auto filter_dims = param.filter->dims(); const auto filter_dims = param.filter->dims();
std::vector<int64_t> output_shape({in_dims[0]}); std::vector<int64_t> output_shape({in_dims[0]});
auto paddings = *param.paddings;
auto dilations = *param.dilations;
for (size_t i = 0; i < param.strides.size(); ++i) { for (size_t i = 0; i < param.strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 1], output_shape.push_back(ConvOutputSize(in_dims[i + 1],
filter_dims[i + 1], filter_dims[i + 1],
param.dilations[i], dilations[i],
param.paddings[i], paddings[2 * i],
paddings[2 * i + 1],
param.strides[i])); param.strides[i]));
} }
output_shape.push_back(filter_dims[0]); output_shape.push_back(filter_dims[0]);
......
...@@ -41,7 +41,8 @@ TEST(conv_compute, fp32) { ...@@ -41,7 +41,8 @@ TEST(conv_compute, fp32) {
act_param.Leaky_relu_alpha = 0.1; act_param.Leaky_relu_alpha = 0.1;
operators::ConvParam param; operators::ConvParam param;
param.activation_param = act_param; param.activation_param = act_param;
param.paddings = {1, 1}; std::vector<int> pads = {1, 1, 1, 1};
param.paddings = std::make_shared<std::vector<int>>(pads);
param.groups = 1; param.groups = 1;
Tensor x, filter, bias, y, x_cpu, filter_cpu, bias_cpu, y_cpu; Tensor x, filter, bias, y, x_cpu, filter_cpu, bias_cpu, y_cpu;
......
...@@ -36,8 +36,15 @@ void ConvCompute::PrepareForRun() { ...@@ -36,8 +36,15 @@ void ConvCompute::PrepareForRun() {
conv_param.filter = param.filter->ZynqTensor(); conv_param.filter = param.filter->ZynqTensor();
conv_param.groups = param.groups; conv_param.groups = param.groups;
conv_param.strides = param.strides; conv_param.strides = param.strides;
auto paddings = *param.paddings;
conv_param.paddings = param.paddings; conv_param.paddings = param.paddings;
conv_param.dilations = param.dilations; conv_param.dilations = param.dilations;
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", " << paddings[1]
<< ", " << paddings[2] << ", " << paddings[3];
}
fill_scale_bias_const(&conv_param); fill_scale_bias_const(&conv_param);
conv_param.bias()->copyFrom(param.bias->ZynqTensor()); conv_param.bias()->copyFrom(param.bias->ZynqTensor());
conv_param.relu.enabled = param.fuse_relu; conv_param.relu.enabled = param.fuse_relu;
......
...@@ -141,13 +141,15 @@ void conv_compute_ref(const operators::ConvParam& param) { ...@@ -141,13 +141,15 @@ void conv_compute_ref(const operators::ConvParam& param) {
int group = param.groups; int group = param.groups;
int kernel_w = param.filter->dims()[2]; int kernel_w = param.filter->dims()[2];
int kernel_h = param.filter->dims()[3]; int kernel_h = param.filter->dims()[3];
auto paddings = *param.paddings;
auto dilations = *para.dilations;
int stride_w = param.strides[0]; int stride_w = param.strides[0];
int stride_h = param.strides[1]; int stride_h = param.strides[1];
int dila_w = param.dilations[0]; int dila_w = dilations[0];
int dila_h = param.dilations[1]; int dila_h = dilations[1];
int pad_w = paddings[2];
int pad_w = param.paddings[0]; int pad_h = paddings[0];
int pad_h = param.paddings[1];
bool flag_bias = (param.bias != nullptr); bool flag_bias = (param.bias != nullptr);
bool flag_relu = param.fuse_relu; bool flag_relu = param.fuse_relu;
...@@ -277,10 +279,14 @@ TEST(conv_fpga, compute) { ...@@ -277,10 +279,14 @@ TEST(conv_fpga, compute) {
param.bias = &bias; param.bias = &bias;
} }
param.fuse_relu = flag_relu; param.fuse_relu = flag_relu;
param.paddings = std::vector<int>({padding, padding}); std::vector<int> paddings = {
padding, padding, padding, padding};
param.strides = std::vector<int>({stride, stride}); param.strides = std::vector<int>({stride, stride});
std::vector<int> dilations = {dilation, dilation};
param.paddings =
std::make_shared<std::vector<int>>(paddings);
param.dilations = param.dilations =
std::vector<int>({dilation, dilation}); std::make_shared<std::vector<int>>(dilations);
param.groups = group; param.groups = group;
conv.SetParam(param); conv.SetParam(param);
conv.Launch(); conv.Launch();
......
...@@ -42,9 +42,9 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op, ...@@ -42,9 +42,9 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
auto bs = input_dims[0]; auto bs = input_dims[0];
auto ic = input_dims[1]; auto ic = input_dims[1];
auto oc = filter_dims[0]; auto oc = filter_dims[0];
CHECK_EQ(input_dims.size(), 4); CHECK_EQ(input_dims.size(), 4L);
CHECK_EQ(output_dims.size(), 4); CHECK_EQ(output_dims.size(), 4L);
CHECK_EQ(filter_dims.size(), 4); CHECK_EQ(filter_dims.size(), 4L);
CHECK_EQ(output_dims[0], bs); CHECK_EQ(output_dims[0], bs);
CHECK_EQ(output_dims[1], oc); CHECK_EQ(output_dims[1], oc);
auto strides = op_info->GetAttr<std::vector<int>>("strides"); auto strides = op_info->GetAttr<std::vector<int>>("strides");
...@@ -52,9 +52,16 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op, ...@@ -52,9 +52,16 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
auto groups = op_info->GetAttr<int>("groups"); auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations"); auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
auto fuse_relu = op_info->GetAttr<bool>("fuse_relu"); auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
CHECK_EQ(strides.size(), 2); CHECK_EQ(strides.size(), 2L);
CHECK_EQ(paddings.size(), 2); CHECK_EQ(paddings.size(), 4L);
CHECK_EQ(dilations.size(), 2); CHECK_EQ(dilations.size(), 2L);
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", " << paddings[1]
<< ", " << paddings[2] << ", " << paddings[3];
}
// check depthwise mode, and decide whether use ConvolutionDepthwise Op // check depthwise mode, and decide whether use ConvolutionDepthwise Op
bool use_depthwise_conv = bool use_depthwise_conv =
...@@ -134,7 +141,7 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op, ...@@ -134,7 +141,7 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
depthwise_conv_node->set_attr_pad_mode(5); // VALID depthwise_conv_node->set_attr_pad_mode(5); // VALID
depthwise_conv_node->set_attr_group(groups); depthwise_conv_node->set_attr_group(groups);
depthwise_conv_node->set_attr_pad(ge::AttrValue::LIST_INT( depthwise_conv_node->set_attr_pad(ge::AttrValue::LIST_INT(
{paddings[0], paddings[0], paddings[1], paddings[1]})); {paddings[0], paddings[0], paddings[2], paddings[2]}));
depthwise_conv_node->set_attr_dilation( depthwise_conv_node->set_attr_dilation(
ge::AttrValue::LIST_INT({dilations[0], dilations[1]})); ge::AttrValue::LIST_INT({dilations[0], dilations[1]}));
depthwise_conv_node->set_attr_stride( depthwise_conv_node->set_attr_stride(
...@@ -161,7 +168,7 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op, ...@@ -161,7 +168,7 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> conv_op,
common_conv_node->set_attr_pad_mode(0); // NOTSET common_conv_node->set_attr_pad_mode(0); // NOTSET
common_conv_node->set_attr_group(groups); common_conv_node->set_attr_group(groups);
common_conv_node->set_attr_pad(ge::AttrValue::LIST_INT( common_conv_node->set_attr_pad(ge::AttrValue::LIST_INT(
{paddings[0], paddings[0], paddings[1], paddings[1]})); {paddings[0], paddings[0], paddings[2], paddings[2]}));
common_conv_node->set_attr_dilation( common_conv_node->set_attr_dilation(
ge::AttrValue::LIST_INT({dilations[0], dilations[1]})); ge::AttrValue::LIST_INT({dilations[0], dilations[1]}));
common_conv_node->set_attr_stride( common_conv_node->set_attr_stride(
......
...@@ -54,7 +54,7 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) { ...@@ -54,7 +54,7 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) {
int stride_h = strides[0]; int stride_h = strides[0];
int dila_w = dilations[1]; int dila_w = dilations[1];
int dila_h = dilations[0]; int dila_h = dilations[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
int pad_h = paddings[0]; int pad_h = paddings[0];
int batch_size = input_dims[0]; int batch_size = input_dims[0];
int in_ch_size = input_dims[1]; int in_ch_size = input_dims[1];
...@@ -175,7 +175,8 @@ void test_conv(int bs, ...@@ -175,7 +175,8 @@ void test_conv(int bs,
opdesc.SetOutput("Output", {output_var_name}); opdesc.SetOutput("Output", {output_var_name});
opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation})); opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation}));
opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride})); opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int32_t>({padding, padding})); opdesc.SetAttr("paddings",
std::vector<int32_t>({padding, padding, padding, padding}));
opdesc.SetAttr("groups", groups); opdesc.SetAttr("groups", groups);
opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu)); opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu));
if (has_bias) { if (has_bias) {
......
...@@ -44,14 +44,19 @@ node_map_type ConvTransposeConverter( ...@@ -44,14 +44,19 @@ node_map_type ConvTransposeConverter(
auto groups = op_info->GetAttr<int>("groups"); auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations"); auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
auto fuse_relu = op_info->GetAttr<bool>("fuse_relu"); auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
CHECK_EQ(strides.size(), 2); CHECK_EQ(strides.size(), 2L);
CHECK_EQ(paddings.size(), 2); CHECK_EQ(paddings.size(), 4L);
CHECK_EQ(dilations.size(), 2); CHECK_EQ(dilations.size(), 2L);
// create deconv node // create deconv node
auto conv_transpose_node = auto conv_transpose_node =
std::make_shared<ge::op::Deconvolution>(unique_op_type); std::make_shared<ge::op::Deconvolution>(unique_op_type);
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
if (!pad_equal) {
LOG(FATA) << "This pad not support ! " << paddings[0] << ", " << paddings[1]
<< ", " << paddings[2] << ", " << paddings[3];
}
// create input sizes node to describe the dimensions of input tensor // create input sizes node to describe the dimensions of input tensor
std::vector<int32_t> output_shape; std::vector<int32_t> output_shape;
output_shape.push_back(input_shape[0]); output_shape.push_back(input_shape[0]);
......
...@@ -278,7 +278,8 @@ void test_conv_transpose(int bs, ...@@ -278,7 +278,8 @@ void test_conv_transpose(int bs,
opdesc.SetOutput("Output", {output_var_name}); opdesc.SetOutput("Output", {output_var_name});
opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation})); opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation}));
opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride})); opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int32_t>({padding, padding})); opdesc.SetAttr("paddings",
std::vector<int32_t>({padding, padding, padding, padding}));
opdesc.SetAttr("groups", groups); opdesc.SetAttr("groups", groups);
opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu)); opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu));
if (has_bias) { if (has_bias) {
......
...@@ -38,15 +38,20 @@ void ConvCompute::PrepareForRun() { ...@@ -38,15 +38,20 @@ void ConvCompute::PrepareForRun() {
int w_out = output_dims[3]; int w_out = output_dims[3];
int kernel_h = filter_dims[2]; // oihw int kernel_h = filter_dims[2]; // oihw
int kernel_w = filter_dims[3]; int kernel_w = filter_dims[3];
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1]; auto dilations = *param.dilations;
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
int pad_h = paddings[0];
int pad_w = paddings[2];
int groups = param.groups; int groups = param.groups;
bool relu_fused = param.fuse_relu; bool relu_fused = param.fuse_relu;
bool no_dilation = (param.dilations[0] == 1) && (param.dilations[1] == 1); bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
bool zero_pad = (pad_h == 0) && (pad_w == 0); bool zero_pad = (pad_h == 0) && (pad_w == 0);
bool pad_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
VLOG(3) << "Is relu fused? / " << (relu_fused ? "Yes" : "No"); VLOG(3) << "Is relu fused? / " << (relu_fused ? "Yes" : "No");
VLOG(3) << "groups:" << groups << " stride_h:" << stride_h VLOG(3) << "groups:" << groups << " stride_h:" << stride_h
<< " stride_w:" << stride_w << " pad_h:" << pad_h << " stride_w:" << stride_w << " pad_h:" << pad_h
...@@ -60,7 +65,7 @@ void ConvCompute::PrepareForRun() { ...@@ -60,7 +65,7 @@ void ConvCompute::PrepareForRun() {
<< filter_dims[2] << " " << filter_dims[3]; << filter_dims[2] << " " << filter_dims[3];
if (kernel_h == 1 && kernel_w == 1 && stride_h == 1 && stride_w == 1 && if (kernel_h == 1 && kernel_w == 1 && stride_h == 1 && stride_w == 1 &&
zero_pad && no_dilation) { zero_pad && no_dilation && pad_equal) {
// conv2d_1x1 // conv2d_1x1
kernel_func_names_.push_back("gemm_batch"); kernel_func_names_.push_back("gemm_batch");
kernel_func_paths_.push_back("buffer/fc_kernel.cl"); kernel_func_paths_.push_back("buffer/fc_kernel.cl");
...@@ -70,7 +75,7 @@ void ConvCompute::PrepareForRun() { ...@@ -70,7 +75,7 @@ void ConvCompute::PrepareForRun() {
build_options_.push_back("-DCL_DTYPE=float"); build_options_.push_back("-DCL_DTYPE=float");
} }
impl_ = &ConvCompute::Conv2d1x1; impl_ = &ConvCompute::Conv2d1x1;
} else { } else if (pad_equal) {
kernel_func_names_.push_back("im2col"); kernel_func_names_.push_back("im2col");
kernel_func_names_.push_back("gemm_batch"); kernel_func_names_.push_back("gemm_batch");
kernel_func_paths_.push_back("buffer/im2col_kernel.cl"); kernel_func_paths_.push_back("buffer/im2col_kernel.cl");
...@@ -85,6 +90,9 @@ void ConvCompute::PrepareForRun() { ...@@ -85,6 +90,9 @@ void ConvCompute::PrepareForRun() {
col_buffer_.reset(new lite::Tensor); col_buffer_.reset(new lite::Tensor);
col_buffer_->Resize({bs, c_in, kernel_h * kernel_w, h_out * w_out}); col_buffer_->Resize({bs, c_in, kernel_h * kernel_w, h_out * w_out});
col_buffer_->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); col_buffer_->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
} else {
LOG(FATAL) << "This pad not support ! " << paddings[0] << ", "
<< paddings[1] << ", " << paddings[2] << ", " << paddings[3];
} }
for (size_t i = 0; i < kernel_func_names_.size(); i++) { for (size_t i = 0; i < kernel_func_names_.size(); i++) {
...@@ -102,17 +110,19 @@ void ConvCompute::GemmlikeConv2d() { ...@@ -102,17 +110,19 @@ void ConvCompute::GemmlikeConv2d() {
int c_in = x_dims[1]; int c_in = x_dims[1];
int h_in = x_dims[2]; int h_in = x_dims[2];
int w_in = x_dims[3]; int w_in = x_dims[3];
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int c_out = output_dims[1]; int c_out = output_dims[1];
int h_out = output_dims[2]; int h_out = output_dims[2];
int w_out = output_dims[3]; int w_out = output_dims[3];
int kernel_h = filter_dims[2]; int kernel_h = filter_dims[2];
int kernel_w = filter_dims[3]; int kernel_w = filter_dims[3];
int pad_h = param.paddings[0]; int pad_h = paddings[0];
int pad_w = param.paddings[1]; int pad_w = paddings[2];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
int dilation_h = param.dilations[0]; int dilation_h = dilations[0];
int dilation_w = param.dilations[1]; int dilation_w = dilations[1];
auto* x_buf = param.x->data<float, cl::Buffer>(); auto* x_buf = param.x->data<float, cl::Buffer>();
auto* filter_buf = param.filter->data<float, cl::Buffer>(); auto* filter_buf = param.filter->data<float, cl::Buffer>();
......
...@@ -24,7 +24,6 @@ namespace lite { ...@@ -24,7 +24,6 @@ namespace lite {
#define A(i, j) a[i * lda + j] #define A(i, j) a[i * lda + j]
#define B(i, j) cur_b[i * ldb + j] #define B(i, j) cur_b[i * ldb + j]
#define C(i, j) cur_c[i * ldc + j] #define C(i, j) cur_c[i * ldc + j]
template <typename Dtype1, typename Dtype2> template <typename Dtype1, typename Dtype2>
static void conv_basic(const Dtype1* din, static void conv_basic(const Dtype1* din,
Dtype2* dout, Dtype2* dout,
...@@ -227,10 +226,12 @@ TEST(conv2d, compute_conv2d_1x1) { ...@@ -227,10 +226,12 @@ TEST(conv2d, compute_conv2d_1x1) {
param.bias = bias_flag ? &bias : nullptr; param.bias = bias_flag ? &bias : nullptr;
param.output = &out; param.output = &out;
param.strides = {stride, stride}; param.strides = {stride, stride};
param.paddings = {pad, pad}; std::vector<int> paddings = {pad, pad, pad, pad};
param.groups = group; param.groups = group;
param.dilations = {dilation, dilation}; std::vector<int> dilations = {dilation, dilation};
param.fuse_relu = relu_flag; param.fuse_relu = relu_flag;
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.dilations = std::make_shared<std::vector<int>>(dilations);
kernel->SetParam(param); kernel->SetParam(param);
std::unique_ptr<KernelContext> conv_context(new KernelContext); std::unique_ptr<KernelContext> conv_context(new KernelContext);
...@@ -454,11 +455,14 @@ TEST(conv2d, compute_conv2d_gemm) { ...@@ -454,11 +455,14 @@ TEST(conv2d, compute_conv2d_gemm) {
param.bias = bias_flag ? &bias : nullptr; param.bias = bias_flag ? &bias : nullptr;
param.output = &out; param.output = &out;
param.strides = {stride, stride}; param.strides = {stride, stride};
param.paddings = {pad, pad}; std::vector<int> paddings = {pad, pad, pad, pad};
param.groups = group; param.groups = group;
param.dilations = {dilation, dilation}; std::vector<int> dilations = {dilation, dilation};
param.fuse_relu = relu_flag; param.fuse_relu = relu_flag;
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.dilations = std::make_shared<std::vector<int>>(dilations);
kernel->SetParam(param); kernel->SetParam(param);
std::unique_ptr<KernelContext> conv_context(new KernelContext); std::unique_ptr<KernelContext> conv_context(new KernelContext);
context->As<OpenCLContext>().CopySharedTo( context->As<OpenCLContext>().CopySharedTo(
......
...@@ -44,7 +44,7 @@ class DepthwiseConv2dCompute ...@@ -44,7 +44,7 @@ class DepthwiseConv2dCompute
auto x_dims = param.x->dims(); auto x_dims = param.x->dims();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
auto paddings = param.paddings; auto paddings = *param.paddings;
auto strides = param.strides; auto strides = param.strides;
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
......
...@@ -105,7 +105,8 @@ TEST(depthwise_conv2d, compute) { ...@@ -105,7 +105,8 @@ TEST(depthwise_conv2d, compute) {
param.x = &input; param.x = &input;
param.filter = &filter; param.filter = &filter;
param.output = &output; param.output = &output;
param.paddings = std::vector<int>{0, 0}; std::vector<int> paddings = {0, 0};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.strides = std::vector<int>{1, 1}; param.strides = std::vector<int>{1, 1};
std::unique_ptr<KernelContext> context(new KernelContext); std::unique_ptr<KernelContext> context(new KernelContext);
......
...@@ -67,7 +67,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -67,7 +67,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
lite::DDim col_shape(col_shape_vec); lite::DDim col_shape(col_shape_vec);
lite::DDim col_matrix_shape = col_shape.Flatten2D(data_dim); lite::DDim col_matrix_shape = col_shape.Flatten2D(data_dim);
bool is_expand = IsExpand( bool is_expand = IsExpand(
filter_shape_vec, param.strides, param.paddings, param.dilations); filter_shape_vec, param.strides, *param.paddings, *param.dilations);
lite::Tensor col; lite::Tensor col;
lite::Tensor col_matrix; lite::Tensor col_matrix;
if (is_expand) { if (is_expand) {
...@@ -103,7 +103,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -103,7 +103,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
lite::Tensor in_slice = lite::Tensor in_slice =
in_batch.Slice<T>(static_cast<int64_t>(g * in_step), in_batch.Slice<T>(static_cast<int64_t>(g * in_step),
static_cast<int64_t>((g + 1) * in_step)); static_cast<int64_t>((g + 1) * in_step));
auto paddings = *param.paddings;
if (!is_expand) { if (!is_expand) {
col.ShareDataWith(in_slice); col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col); col_matrix.ShareDataWith(col);
...@@ -112,20 +112,18 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -112,20 +112,18 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// im2col // im2col
im2col(context, im2col(context,
in_slice, in_slice,
param.dilations, *param.dilations,
param.strides, param.strides,
std::vector<int>{param.paddings[0], std::vector<int>{
param.paddings[1], paddings[0], paddings[2], paddings[0], paddings[2]},
param.paddings[0],
param.paddings[1]},
&(col)); &(col));
} else if (data_dim == 3U) { } else if (data_dim == 3U) {
// vol2col // vol2col
vol2col(context, vol2col(context,
in_slice, in_slice,
param.dilations, *param.dilations,
param.strides, param.strides,
param.paddings, *param.paddings,
&(col)); &(col));
} }
......
...@@ -73,9 +73,11 @@ TEST(conv2d_x86, run_test) { ...@@ -73,9 +73,11 @@ TEST(conv2d_x86, run_test) {
param.bias = &b; param.bias = &b;
param.output = &out; param.output = &out;
param.strides = {1, 1}; param.strides = {1, 1};
param.paddings = {0, 0}; std::vector<int> paddings = {0, 0, 0, 0};
param.groups = 1; param.groups = 1;
param.dilations = {1, 1}; std::vector<int> dilations = {1, 1};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.dilations = std::make_shared<std::vector<int>>(dilations);
LOG(INFO) << 123; LOG(INFO) << 123;
std::unique_ptr<KernelContext> ctx(new KernelContext); std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>(); ctx->As<X86Context>();
......
...@@ -46,17 +46,25 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> op, ...@@ -46,17 +46,25 @@ node_map_type ConvConverter(const std::shared_ptr<lite::OpLite> op,
auto groups = op_info->GetAttr<int>("groups"); auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations"); auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
auto fuse_relu = op_info->GetAttr<bool>("fuse_relu"); auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
CHECK_EQ(strides.size(), 2); CHECK_EQ(strides.size(), 2L);
CHECK_EQ(paddings.size(), 2); CHECK_EQ(paddings.size(), 4L);
CHECK_EQ(dilations.size(), 2); CHECK_EQ(dilations.size(), 2L);
std::vector<int64_t> output_shape({bs, oc}); std::vector<int64_t> output_shape({bs, oc});
for (size_t i = 0; i < 2; i++) { for (size_t i = 0; i < 2; i++) {
const int dkernel = dilations[i] * (filter_dims[2 + i] - 1) + 1; const int dkernel = dilations[i] * (filter_dims[2 + i] - 1) + 1;
output_shape.push_back( output_shape.push_back(
(input_dims[i + 2] + 2 * paddings[i] - dkernel) / strides[i] + 1); (input_dims[i + 2] + paddings[2 * i] + paddings[2 * i + 1] - dkernel) /
strides[i] +
1);
} }
DDim output_dims(output_shape); DDim output_dims(output_shape);
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
if (!pads_equal) {
LOG(FATAL) << "Padding requies pad_top==pad_bottom and pad_lef==pad_right.";
}
// check context // check context
CHECK(graph_ctx != nullptr); CHECK(graph_ctx != nullptr);
CHECK(graph_ctx->builder != nullptr); CHECK(graph_ctx->builder != nullptr);
......
...@@ -54,7 +54,7 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) { ...@@ -54,7 +54,7 @@ void conv_ref(const std::shared_ptr<operators::ConvOpLite> op) {
int stride_h = strides[0]; int stride_h = strides[0];
int dila_w = dilations[1]; int dila_w = dilations[1];
int dila_h = dilations[0]; int dila_h = dilations[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
int pad_h = paddings[0]; int pad_h = paddings[0];
int batch_size = input_dims[0]; int batch_size = input_dims[0];
int in_ch_size = input_dims[1]; int in_ch_size = input_dims[1];
...@@ -175,7 +175,8 @@ void test_conv(int bs, ...@@ -175,7 +175,8 @@ void test_conv(int bs,
opdesc.SetOutput("Output", {output_var_name}); opdesc.SetOutput("Output", {output_var_name});
opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation})); opdesc.SetAttr("dilations", std::vector<int32_t>({dilation, dilation}));
opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride})); opdesc.SetAttr("strides", std::vector<int32_t>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int32_t>({padding, padding})); opdesc.SetAttr("paddings",
std::vector<int32_t>({padding, padding, padding, padding}));
opdesc.SetAttr("groups", groups); opdesc.SetAttr("groups", groups);
opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu)); opdesc.SetAttr("fuse_relu", static_cast<bool>(fuse_relu));
if (has_bias) { if (has_bias) {
......
...@@ -39,11 +39,15 @@ bool ConvOpLite::CheckShape() const { ...@@ -39,11 +39,15 @@ bool ConvOpLite::CheckShape() const {
return true; return true;
} }
inline int ConvOutputSize( inline int ConvOutputSize(int input_size,
int input_size, int filter_size, int dilation, int padding, int stride) { int filter_size,
int dilation,
int pad_left,
int pad_right,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1; const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1; int output_size =
// CHECK_GT_OR_FALSE(output_size, 0); (input_size + (pad_left + pad_right) - dkernel) / stride + 1;
return output_size; return output_size;
} }
...@@ -61,8 +65,11 @@ inline void UpdatePaddingAndDilation(std::vector<int>* paddings, ...@@ -61,8 +65,11 @@ inline void UpdatePaddingAndDilation(std::vector<int>* paddings,
int pad_sum = int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2], std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
(int64_t)0); (int64_t)0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
// pad // pad
*(paddings->begin() + i) = pad_sum / 2; *(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
// dilation // dilation
*(dilations->begin() + i) = 1; *(dilations->begin() + i) = 1;
} }
...@@ -77,18 +84,21 @@ bool ConvOpLite::InferShape() const { ...@@ -77,18 +84,21 @@ bool ConvOpLite::InferShape() const {
const auto in_dims = param_.x->dims(); const auto in_dims = param_.x->dims();
const auto filter_dims = param_.filter->dims(); const auto filter_dims = param_.filter->dims();
UpdatePaddingAndDilation(&param_.paddings, UpdatePaddingAndDilation(param_.paddings.get(),
&param_.dilations, param_.dilations.get(),
param_.strides, param_.strides,
padding_algorithm_, padding_algorithm_,
in_dims, in_dims,
filter_dims); filter_dims);
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
auto paddings = *param_.paddings;
auto dilations = *param_.dilations;
for (size_t i = 0; i < param_.strides.size(); ++i) { for (size_t i = 0; i < param_.strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], output_shape.push_back(ConvOutputSize(in_dims[i + 2],
filter_dims[i + 2], filter_dims[i + 2],
param_.dilations[i], dilations[i],
param_.paddings[i], paddings[i * 2],
paddings[i * 2 + 1],
param_.strides[i])); param_.strides[i]));
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
...@@ -47,9 +48,10 @@ class ConvOpLite : public OpLite { ...@@ -47,9 +48,10 @@ class ConvOpLite : public OpLite {
param_.output = scope->FindVar(Out)->GetMutable<lite::Tensor>(); param_.output = scope->FindVar(Out)->GetMutable<lite::Tensor>();
param_.strides = op_desc.GetAttr<std::vector<int>>("strides"); param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
param_.paddings = op_desc.GetAttr<std::vector<int>>("paddings"); auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
param_.groups = op_desc.GetAttr<int>("groups"); param_.groups = op_desc.GetAttr<int>("groups");
param_.dilations = op_desc.GetAttr<std::vector<int>>("dilations"); auto dilations = op_desc.GetAttr<std::vector<int>>("dilations");
param_.dilations = std::make_shared<std::vector<int>>(dilations);
// optional params // optional params
std::vector<std::string> input_arg_names = op_desc.InputArgumentNames(); std::vector<std::string> input_arg_names = op_desc.InputArgumentNames();
...@@ -109,6 +111,20 @@ class ConvOpLite : public OpLite { ...@@ -109,6 +111,20 @@ class ConvOpLite : public OpLite {
param_.output_scale = op_desc.GetAttr<float>("output_scale"); param_.output_scale = op_desc.GetAttr<float>("output_scale");
} }
} }
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < param_.strides.size(); ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the input size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
return true; return true;
} }
......
...@@ -11,8 +11,8 @@ ...@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/operators/conv_transpose_op.h" #include "lite/operators/conv_transpose_op.h"
#include <memory>
#include "lite/core/op_lite.h" #include "lite/core/op_lite.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
...@@ -32,7 +32,6 @@ bool ConvTransposeOpLite::CheckShape() const { ...@@ -32,7 +32,6 @@ bool ConvTransposeOpLite::CheckShape() const {
CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size()); CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size());
CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U); CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U);
CHECK_EQ_OR_FALSE(param_.paddings.size(), param_.strides.size());
CHECK_OR_FALSE(in_dims[1] % param_.groups == 0); CHECK_OR_FALSE(in_dims[1] % param_.groups == 0);
return true; return true;
...@@ -42,13 +41,16 @@ bool ConvTransposeOpLite::InferShape() const { ...@@ -42,13 +41,16 @@ bool ConvTransposeOpLite::InferShape() const {
const auto in_dims = param_.x->dims(); const auto in_dims = param_.x->dims();
const auto filter_dims = param_.filter->dims(); const auto filter_dims = param_.filter->dims();
auto paddings = *param_.paddings;
auto dilations = *param_.dilations;
std::vector<int64_t> output_shape; std::vector<int64_t> output_shape;
output_shape.push_back(in_dims[0]); output_shape.push_back(in_dims[0]);
output_shape.push_back(filter_dims[1] * param_.groups); output_shape.push_back(filter_dims[1] * param_.groups);
for (int i = 0; i < param_.strides.size(); i++) { for (int i = 0; i < param_.strides.size(); i++) {
int kernel_extent = param_.dilations[i] * (filter_dims[i + 2] - 1) + 1; int kernel_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
int output_len = (in_dims[i + 2] - 1) * param_.strides[i] + kernel_extent - int output_len = (in_dims[i + 2] - 1) * param_.strides[i] + kernel_extent -
2 * param_.paddings[i]; (paddings[2 * i] + paddings[2 * i + 1]);
output_shape.push_back(output_len); output_shape.push_back(output_len);
} }
...@@ -68,9 +70,24 @@ bool ConvTransposeOpLite::AttachImpl(const cpp::OpDesc &op_desc, ...@@ -68,9 +70,24 @@ bool ConvTransposeOpLite::AttachImpl(const cpp::OpDesc &op_desc,
param_.output = scope->FindVar(Out)->GetMutable<lite::Tensor>(); param_.output = scope->FindVar(Out)->GetMutable<lite::Tensor>();
param_.strides = op_desc.GetAttr<std::vector<int>>("strides"); param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
param_.paddings = op_desc.GetAttr<std::vector<int>>("paddings"); auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
param_.groups = op_desc.GetAttr<int>("groups"); param_.groups = op_desc.GetAttr<int>("groups");
param_.dilations = op_desc.GetAttr<std::vector<int>>("dilations"); auto dilations = op_desc.GetAttr<std::vector<int>>("dilations");
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the input size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
param_.dilations = std::make_shared<std::vector<int>>(dilations);
// optional params // optional params
std::vector<std::string> input_arg_names = op_desc.InputArgumentNames(); std::vector<std::string> input_arg_names = op_desc.InputArgumentNames();
......
...@@ -254,9 +254,19 @@ struct ConvParam { ...@@ -254,9 +254,19 @@ struct ConvParam {
lite::Tensor* residualData{nullptr}; lite::Tensor* residualData{nullptr};
lite::Tensor* output{}; lite::Tensor* output{};
std::vector<int> strides{1, 1}; std::vector<int> strides{1, 1};
std::vector<int> paddings{0, 0}; /* paddings type change
* from std::vector<int> to std::shared_ptr<std::vector<int>>
* to support dynamically modify padding
* let kernel param and operator param Synchronous update
*/
std::shared_ptr<std::vector<int>> paddings;
int groups{1}; int groups{1};
std::vector<int> dilations{1, 1}; /* dilations type change
* from std::vector<int> to std::shared_ptr<std::vector<int>>
* to support dynamically modify padding
* let kernel param and operator param Synchronous update
*/
std::shared_ptr<std::vector<int>> dilations;
bool fuse_relu_before_depthwise_conv{false}; bool fuse_relu_before_depthwise_conv{false};
bool use_mkldnn{false}; bool use_mkldnn{false};
bool fuse_relu{false}; // only used in mkldnn kernel bool fuse_relu{false}; // only used in mkldnn kernel
......
...@@ -64,21 +64,25 @@ using paddle::lite::Timer; ...@@ -64,21 +64,25 @@ using paddle::lite::Timer;
DDim compute_out_dim(const DDim& dim_in, DDim compute_out_dim(const DDim& dim_in,
const paddle::lite::operators::ConvParam& param) { const paddle::lite::operators::ConvParam& param) {
DDim dim_out = dim_in; DDim dim_out = dim_in;
auto paddings = *param.paddings;
auto dilations = *param.dilations;
dim_out[1] = param.filter->dims()[0]; dim_out[1] = param.filter->dims()[0];
auto kernel_h = param.filter->dims()[2]; auto kernel_h = param.filter->dims()[2];
auto kernel_w = param.filter->dims()[3]; auto kernel_w = param.filter->dims()[3];
auto h = dim_in[2]; auto h = dim_in[2];
auto w = dim_in[3]; auto w = dim_in[3];
int dila_h = param.dilations[0]; int dila_h = dilations[0];
int dila_w = param.dilations[1]; int dila_w = dilations[1];
int pad_h = param.paddings[0]; int pad_top = paddings[0];
int pad_w = param.paddings[1]; int pad_bottom = paddings[1];
int pad_left = paddings[2];
int pad_right = paddings[3];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
auto kernel_exten = dila_h * (kernel_h - 1) + 1; auto kernel_exten = dila_h * (kernel_h - 1) + 1;
auto hout = (h + 2 * pad_h - kernel_exten) / stride_h + 1; auto hout = (h + pad_top + pad_bottom - kernel_exten) / stride_h + 1;
kernel_exten = dila_w * (kernel_w - 1) + 1; kernel_exten = dila_w * (kernel_w - 1) + 1;
auto wout = (w + 2 * pad_w - kernel_exten) / stride_w + 1; auto wout = (w + pad_left + pad_right - kernel_exten) / stride_w + 1;
dim_out[2] = hout; dim_out[2] = hout;
dim_out[3] = wout; dim_out[3] = wout;
return dim_out; return dim_out;
...@@ -110,8 +114,8 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -110,8 +114,8 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
param.bias->set_precision(PRECISION(kFloat)); param.bias->set_precision(PRECISION(kFloat));
} }
param.strides = strides; param.strides = strides;
param.paddings = pads; param.paddings = std::make_shared<std::vector<int>>(pads);
param.dilations = dilas; param.dilations = std::make_shared<std::vector<int>>(dilas);
param.fuse_relu = flag_relu; param.fuse_relu = flag_relu;
param.groups = group; param.groups = group;
...@@ -162,7 +166,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -162,7 +166,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
param.output->Resize(dim_out); param.output->Resize(dim_out);
paddle::lite::fill_tensor_rand(*param.x, -1.f, 1.f); paddle::lite::fill_tensor_rand(*param.x, -1.f, 1.f);
// paddle::lite::fill_tensor_const(*param.x, 1.f); // paddle::lite::fill_tensor_const(*param.x, 1.f);
auto din = param.x->data<float>(); auto din = param.x->data<float>();
Tensor tout_basic; Tensor tout_basic;
...@@ -189,7 +193,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -189,7 +193,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
strides[0], strides[0],
dilas[1], dilas[1],
dilas[0], dilas[0],
pads[1], pads[2],
pads[0], pads[0],
flag_bias, flag_bias,
flag_relu); flag_relu);
...@@ -235,7 +239,8 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -235,7 +239,8 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
LOG(FATAL) << "test fp32 conv: input: " << dim_in LOG(FATAL) << "test fp32 conv: input: " << dim_in
<< ", output: " << dim_out << ", output: " << dim_out
<< ", weight dim: " << weight_dim << ", weight dim: " << weight_dim
<< ", pad: " << pads[0] << ", " << pads[1] << ", pad: " << pads[0] << ", " << pads[1] << ", "
<< pads[2] << ", " << pads[3]
<< ", stride: " << strides[0] << ", " << strides[1] << ", stride: " << strides[0] << ", " << strides[1]
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
...@@ -280,27 +285,33 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -280,27 +285,33 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
TEST(TestConv3x3DW, test_conv3x3_depthwise) { TEST(TestConv3x3DW, test_conv3x3_depthwise) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1}) { for (auto& pad_left : {0, 1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_right : {0, 1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_top : {0, 1, 2}) {
for (auto& c : {1, 3, 5, 8, 16, 32}) { for (auto& pad_bottom : {0, 1, 2}) {
std::vector<DDim> dims; for (auto& flag_bias : {false, true}) {
DDim weights_dim({c, 1, 3, 3}); for (auto& flag_relu : {false, true}) {
for (auto& batch : {1, 2}) { for (auto& c : {1, 3, 5, 8, 16, 32}) {
for (auto& h : {1, 3, 15, 19, 28, 32, 75}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, c, h, h})); DDim weights_dim({c, 1, 3, 3});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 3, 15, 19, 28, 32, 75}) {
dims.push_back(DDim({batch, c, h, h}));
}
}
test_conv_fp32(dims,
weights_dim,
c,
{stride, stride},
{pad_top, pad_bottom, pad_left, pad_right},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_fp32(dims,
weights_dim,
c,
{stride, stride},
{pad, pad},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -329,7 +340,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) { ...@@ -329,7 +340,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) {
weights_dim, weights_dim,
c, c,
{stride, stride}, {stride, stride},
{pad, pad}, {pad, pad, pad, pad},
{1, 1}, {1, 1},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -366,7 +377,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) { ...@@ -366,7 +377,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) {
weights_dim, weights_dim,
g, g,
{1, 1}, {1, 1},
{0, 0}, {0, 0, 0, 0},
{1, 1}, {1, 1},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -386,26 +397,32 @@ TEST(TestConv3x3s1, test_conv_3x3s1) { ...@@ -386,26 +397,32 @@ TEST(TestConv3x3s1, test_conv_3x3s1) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 3, 8, 32, 48}) { for (auto& cin : {1, 3, 8, 32, 48}) {
for (auto& cout : {1, 5, 8, 32, 48}) { for (auto& cout : {1, 5, 8, 32, 48}) {
for (auto& pad : {1, 2}) { for (auto& pad_left : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_right : {1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_top : {1, 2}) {
std::vector<DDim> dims; for (auto& pad_bottom : {1, 2}) {
DDim weights_dim({cout, cin, 3, 3}); for (auto& flag_bias : {false, true}) {
for (auto& batch : {1, 2}) { for (auto& flag_relu : {false, true}) {
for (auto& h : {1, 7, 19, 56, 32}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 7, 19, 56, 32}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_fp32(dims,
weights_dim,
1,
{1, 1},
{pad_top, pad_bottom, pad_left, pad_right},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_fp32(dims,
weights_dim,
1,
{1, 1},
{pad, pad},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -420,26 +437,32 @@ TEST(TestConv3x3s2, test_conv_3x3s2) { ...@@ -420,26 +437,32 @@ TEST(TestConv3x3s2, test_conv_3x3s2) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 3, 8, 32}) { for (auto& cin : {1, 3, 8, 32}) {
for (auto& cout : {1, 5, 8, 32}) { for (auto& cout : {1, 5, 8, 32}) {
for (auto& pad : {1, 2}) { for (auto& pad_left : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_right : {1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_top : {1, 2}) {
std::vector<DDim> dims; for (auto& pad_bottom : {1, 2}) {
DDim weights_dim({cout, cin, 3, 3}); for (auto& flag_bias : {false, true}) {
for (auto& batch : {1, 2}) { for (auto& flag_relu : {false, true}) {
for (auto& h : {1, 7, 19, 28, 75, 56, 32}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 7, 19, 28, 75, 56, 32}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_fp32(dims,
weights_dim,
1,
{2, 2},
{pad_top, pad_bottom, pad_left, pad_right},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_fp32(dims,
weights_dim,
1,
{2, 2},
{pad, pad},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -458,30 +481,37 @@ TEST(TestConvRand, test_conv_rand) { ...@@ -458,30 +481,37 @@ TEST(TestConvRand, test_conv_rand) {
for (auto& kw : {1, 2, 3}) { for (auto& kw : {1, 2, 3}) {
for (auto& kh : {1, 2, 3}) { for (auto& kh : {1, 2, 3}) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1, 2}) { for (auto& pad_left : {0, 1, 2}) {
for (auto& dila : {1, 2}) { for (auto& pad_right : {0, 1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_top : {0, 1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_bottom : {0, 1, 2}) {
if (cin % g != 0 || cout % g != 0) { for (auto& dila : {1, 2}) {
continue; for (auto& flag_bias : {false, true}) {
} for (auto& flag_relu : {false, true}) {
std::vector<DDim> dims; if (cin % g != 0 || cout % g != 0) {
DDim weights_dim({cout, cin / g, kh, kw}); continue;
for (auto& batch : {1, 2}) { }
for (auto& h : {1, 3, 19, 32, 28}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin / g, kh, kw});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 3, 19, 32, 28}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_fp32(
dims,
weights_dim,
g,
{stride, stride},
{pad_top, pad_bottom, pad_left, pad_right},
{dila, dila},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_fp32(dims,
weights_dim,
g,
{stride, stride},
{pad, pad},
{dila, dila},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -510,7 +540,7 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) { ...@@ -510,7 +540,7 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) {
FLAGS_kernel_w}), FLAGS_kernel_w}),
FLAGS_group, FLAGS_group,
{FLAGS_stride_h, FLAGS_stride_w}, {FLAGS_stride_h, FLAGS_stride_w},
{FLAGS_pad_h, FLAGS_pad_w}, {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
{FLAGS_dila_h, FLAGS_dila_w}, {FLAGS_dila_h, FLAGS_dila_w},
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
......
...@@ -63,22 +63,22 @@ using paddle::lite::Timer; ...@@ -63,22 +63,22 @@ using paddle::lite::Timer;
DDim compute_out_dim(const DDim& dim_in, DDim compute_out_dim(const DDim& dim_in,
const paddle::lite::operators::ConvParam& param) { const paddle::lite::operators::ConvParam& param) {
auto paddings = *param.paddings;
auto dilations = *param.dilations;
DDim dim_out = dim_in; DDim dim_out = dim_in;
dim_out[1] = param.filter->dims()[0]; dim_out[1] = param.filter->dims()[0];
auto kernel_h = param.filter->dims()[2]; auto kernel_h = param.filter->dims()[2];
auto kernel_w = param.filter->dims()[3]; auto kernel_w = param.filter->dims()[3];
auto h = dim_in[2]; auto h = dim_in[2];
auto w = dim_in[3]; auto w = dim_in[3];
int dila_h = param.dilations[0]; int dila_h = dilations[0];
int dila_w = param.dilations[1]; int dila_w = dilations[1];
int pad_h = param.paddings[0];
int pad_w = param.paddings[1];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
auto kernel_exten = dila_h * (kernel_h - 1) + 1; auto kernel_exten = dila_h * (kernel_h - 1) + 1;
auto hout = (h + 2 * pad_h - kernel_exten) / stride_h + 1; auto hout = (h + paddings[0] + paddings[1] - kernel_exten) / stride_h + 1;
kernel_exten = dila_w * (kernel_w - 1) + 1; kernel_exten = dila_w * (kernel_w - 1) + 1;
auto wout = (w + 2 * pad_w - kernel_exten) / stride_w + 1; auto wout = (w + paddings[2] + paddings[3] - kernel_exten) / stride_w + 1;
dim_out[2] = hout; dim_out[2] = hout;
dim_out[3] = wout; dim_out[3] = wout;
return dim_out; return dim_out;
...@@ -104,8 +104,8 @@ void get_conv_param(const DDim& dim_w, ...@@ -104,8 +104,8 @@ void get_conv_param(const DDim& dim_w,
param->bias->set_precision(PRECISION(kFloat)); param->bias->set_precision(PRECISION(kFloat));
} }
param->strides = strides; param->strides = strides;
param->paddings = pads; param->paddings = std::make_shared<std::vector<int>>(pads);
param->dilations = dila; param->dilations = std::make_shared<std::vector<int>>(dila);
param->fuse_relu = flag_relu; param->fuse_relu = flag_relu;
param->groups = g; param->groups = g;
...@@ -288,7 +288,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -288,7 +288,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
strides[0], strides[0],
dilas[1], dilas[1],
dilas[0], dilas[0],
pads[1], pads[2],
pads[0], pads[0],
flag_bias, flag_bias,
flag_relu); flag_relu);
...@@ -358,7 +358,8 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -358,7 +358,8 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
LOG(FATAL) << "test int8 conv, fp32 out: input: " << dim_in LOG(FATAL) << "test int8 conv, fp32 out: input: " << dim_in
<< ", output: " << dim_out << ", output: " << dim_out
<< ", weight dim: " << weight_dim << ", weight dim: " << weight_dim
<< ", pad: " << pads[0] << ", " << pads[1] << ", pad: " << pads[0] << ", " << pads[1] << ", "
<< pads[2] << ", " << pads[3]
<< ", stride: " << strides[0] << ", " << strides[1] << ", stride: " << strides[0] << ", " << strides[1]
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
...@@ -416,7 +417,8 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -416,7 +417,8 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
LOG(FATAL) << "test int8 conv, int8 out: input: " << dim_in LOG(FATAL) << "test int8 conv, int8 out: input: " << dim_in
<< ", output: " << dim_out << ", output: " << dim_out
<< ", weight dim: " << weight_dim << ", weight dim: " << weight_dim
<< ", pad: " << pads[0] << ", " << pads[1] << ", pad: " << pads[0] << ", " << pads[1] << ", "
<< pads[2] << ", " << pads[3]
<< ", stride: " << strides[0] << ", " << strides[1] << ", stride: " << strides[0] << ", " << strides[1]
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
...@@ -428,9 +430,9 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -428,9 +430,9 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
} }
LOG(INFO) << "test int8 conv: input: " << dim_in LOG(INFO) << "test int8 conv: input: " << dim_in
<< ", output: " << dim_out << ", weight dim: " << weight_dim << ", output: " << dim_out << ", weight dim: " << weight_dim
<< ", pad: " << pads[0] << ", " << pads[1] << ", pad: " << pads[0] << ", " << pads[1] << ", " << pads[2]
<< ", stride: " << strides[0] << ", " << strides[1] << ", " << pads[3] << ", stride: " << strides[0] << ", "
<< ", dila_: " << dilas[0] << ", " << dilas[1] << strides[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", power_mode: " << cls << ", threads: " << th << ", power_mode: " << cls
...@@ -473,7 +475,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) { ...@@ -473,7 +475,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
weights_dim, weights_dim,
c, c,
{stride, stride}, {stride, stride},
{pad, pad}, {pad, pad, pad, pad},
{1, 1}, {1, 1},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -507,7 +509,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) { ...@@ -507,7 +509,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
weights_dim, weights_dim,
c, c,
{stride, stride}, {stride, stride},
{pad, pad}, {pad, pad, pad, pad},
{1, 1}, {1, 1},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -544,7 +546,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) { ...@@ -544,7 +546,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) {
weights_dim, weights_dim,
g, g,
{1, 1}, {1, 1},
{0, 0}, {0, 0, 0, 0},
{1, 1}, {1, 1},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -564,26 +566,32 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) { ...@@ -564,26 +566,32 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 3, 8, 32, 48}) { for (auto& cin : {1, 3, 8, 32, 48}) {
for (auto& cout : {1, 5, 8, 32, 48}) { for (auto& cout : {1, 5, 8, 32, 48}) {
for (auto& pad : {1, 2}) { for (auto& pad_top : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_bottom : {1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_left : {1, 2}) {
std::vector<DDim> dims; for (auto& pad_right : {1, 2}) {
DDim weights_dim({cout, cin, 3, 3}); for (auto& flag_bias : {false, true}) {
for (auto& batch : {1, 2}) { for (auto& flag_relu : {false, true}) {
for (auto& h : {1, 7, 19, 56, 32}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 7, 19, 56, 32}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_int8(dims,
weights_dim,
1,
{1, 1},
{pad_top, pad_bottom, pad_left, pad_right},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_int8(dims,
weights_dim,
1,
{1, 1},
{pad, pad},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -598,26 +606,32 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) { ...@@ -598,26 +606,32 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& cin : {1, 3, 8, 32}) { for (auto& cin : {1, 3, 8, 32}) {
for (auto& cout : {1, 5, 8, 32}) { for (auto& cout : {1, 5, 8, 32}) {
for (auto& pad : {1, 2}) { for (auto& pad_top : {1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_bottom : {1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_left : {1, 2}) {
std::vector<DDim> dims; for (auto& pad_right : {1, 2}) {
DDim weights_dim({cout, cin, 3, 3}); for (auto& flag_bias : {false, true}) {
for (auto& batch : {1, 2}) { for (auto& flag_relu : {false, true}) {
for (auto& h : {1, 7, 19, 28, 75, 56, 32}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin, 3, 3});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 7, 19, 28, 75, 56, 32}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_int8(dims,
weights_dim,
1,
{2, 2},
{pad_top, pad_bottom, pad_left, pad_right},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_int8(dims,
weights_dim,
1,
{2, 2},
{pad, pad},
{1, 1},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -636,30 +650,37 @@ TEST(TestConvRandInt8, test_conv_rand) { ...@@ -636,30 +650,37 @@ TEST(TestConvRandInt8, test_conv_rand) {
for (auto& kw : {1, 2, 3}) { for (auto& kw : {1, 2, 3}) {
for (auto& kh : {1, 2, 3}) { for (auto& kh : {1, 2, 3}) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1, 2}) { for (auto& pad_top : {0, 1, 2}) {
for (auto& dila : {1, 2}) { for (auto& pad_bottom : {0, 1, 2}) {
for (auto& flag_bias : {false, true}) { for (auto& pad_left : {0, 1, 2}) {
for (auto& flag_relu : {false, true}) { for (auto& pad_right : {0, 1, 2}) {
if (cin % g != 0 || cout % g != 0) { for (auto& dila : {1, 2}) {
continue; for (auto& flag_bias : {false, true}) {
} for (auto& flag_relu : {false, true}) {
std::vector<DDim> dims; if (cin % g != 0 || cout % g != 0) {
DDim weights_dim({cout, cin / g, kh, kw}); continue;
for (auto& batch : {1, 2}) { }
for (auto& h : {1, 3, 19, 32, 28}) { std::vector<DDim> dims;
dims.push_back(DDim({batch, cin, h, h})); DDim weights_dim({cout, cin / g, kh, kw});
for (auto& batch : {1, 2}) {
for (auto& h : {1, 3, 19, 32, 28}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_conv_int8(
dims,
weights_dim,
g,
{stride, stride},
{pad_top, pad_bottom, pad_left, pad_right},
{dila, dila},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_conv_int8(dims,
weights_dim,
g,
{stride, stride},
{pad, pad},
{dila, dila},
flag_bias,
flag_relu,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -688,7 +709,7 @@ TEST(TestConvCustomInt8, test_conv_custom_size) { ...@@ -688,7 +709,7 @@ TEST(TestConvCustomInt8, test_conv_custom_size) {
FLAGS_kernel_w}), FLAGS_kernel_w}),
FLAGS_group, FLAGS_group,
{FLAGS_stride_h, FLAGS_stride_w}, {FLAGS_stride_h, FLAGS_stride_w},
{FLAGS_pad_h, FLAGS_pad_w}, {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
{FLAGS_dila_h, FLAGS_dila_w}, {FLAGS_dila_h, FLAGS_dila_w},
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
......
...@@ -66,10 +66,12 @@ DDim compute_out_dim(const DDim& dim_in, ...@@ -66,10 +66,12 @@ DDim compute_out_dim(const DDim& dim_in,
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
DDim output_shape = dim_in; DDim output_shape = dim_in;
output_shape[1] = filter_dims[1] * param.groups; output_shape[1] = filter_dims[1] * param.groups;
auto paddings = *param.paddings;
auto dilations = *param.dilations;
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
int kernel_extent = param.dilations[i] * (filter_dims[i + 2] - 1) + 1; int kernel_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
int output_len = (dim_in[i + 2] - 1) * param.strides[i] + kernel_extent - int output_len = (dim_in[i + 2] - 1) * param.strides[i] + kernel_extent -
2 * param.paddings[i]; (paddings[2 * i] + paddings[2 * i + 1]);
output_shape[i + 2] = output_len; output_shape[i + 2] = output_len;
} }
return output_shape; return output_shape;
...@@ -101,8 +103,8 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -101,8 +103,8 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
param.bias->set_precision(PRECISION(kFloat)); param.bias->set_precision(PRECISION(kFloat));
} }
param.strides = strides; param.strides = strides;
param.paddings = pads; param.paddings = std::make_shared<std::vector<int>>(pads);
param.dilations = dilas; param.dilations = std::make_shared<std::vector<int>>(dilas);
param.fuse_relu = flag_relu; param.fuse_relu = flag_relu;
param.groups = group; param.groups = group;
...@@ -182,7 +184,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -182,7 +184,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
strides[0], strides[0],
dilas[1], dilas[1],
dilas[0], dilas[0],
pads[1], pads[2],
pads[0], pads[0],
flag_bias, flag_bias,
flag_relu); flag_relu);
...@@ -296,7 +298,7 @@ TEST(TestConvRand, test_conv_transpose_rand) { ...@@ -296,7 +298,7 @@ TEST(TestConvRand, test_conv_transpose_rand) {
weights_dim, weights_dim,
g, g,
{stride, stride}, {stride, stride},
{pad, pad}, {pad, pad, pad, pad},
{dila, dila}, {dila, dila},
flag_bias, flag_bias,
flag_relu, flag_relu,
...@@ -330,7 +332,7 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) { ...@@ -330,7 +332,7 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) {
FLAGS_kernel_w}), FLAGS_kernel_w}),
FLAGS_group, FLAGS_group,
{FLAGS_stride_h, FLAGS_stride_w}, {FLAGS_stride_h, FLAGS_stride_w},
{FLAGS_pad_h, FLAGS_pad_w}, {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
{FLAGS_dila_h, FLAGS_dila_w}, {FLAGS_dila_h, FLAGS_dila_w},
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册