diff --git a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc index 01d5eb7b8b0ef0940594584534cb556e204163e0..284d4c7342caa08aca1489daf425284a9ff8143c 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc @@ -456,6 +456,389 @@ TEST(conv_arm_int8, int8_fp32) { } } +TEST(conv_direct_int8, compute) { + DeviceInfo::Init(); + for (auto n : {1, 2}) { + for (auto ic : {1, 3, 8}) { + for (auto oc : {1, 3, 8}) { + for (auto ih : {5, 15, 28}) { + for (auto iw : {5, 15, 28}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto depthwise : {false, /*true*/}) { + for (auto dilation : {1}) { + for (auto stride : {1, 2}) { + for (auto padding : {1}) { + for (auto ks : {3}) { + int group = 1; + if (depthwise) { // depthwise convolution ? + group = oc = ic; + } + + const int dks = dilation * (ks - 1) + 1; + int oh = (ih + 2 * padding - dks) / stride + 1; + int ow = (iw + 2 * padding - dks) / stride + 1; + std::vector input_shape = {n, ic, ih, iw}; + std::vector filter_shape = {oc, ic / group, + ks, ks}; + std::vector bias_shape({1, oc, 1, 1}); + std::vector output_shape({n, oc, oh, ow}); + + Tensor input_fp32, input_int8; + Tensor filter_fp32, filter_int8; + Tensor bias_int32; + Tensor output_int32_ref, output_int32; + Tensor output_fp32_ref, output_fp32; + Tensor output_int8_ref, output_int8; + + input_fp32.Resize(input_shape); + input_int8.Resize(input_shape); + filter_fp32.Resize(filter_shape); + filter_int8.Resize(filter_shape); + bias_int32.Resize(bias_shape); + output_int32.Resize(output_shape); + output_int32_ref.Resize(output_shape); + output_fp32_ref.Resize(output_shape); + output_fp32.Resize(output_shape); + output_int8_ref.Resize(output_shape); + output_int8.Resize(output_shape); + + float* input_fp32_data = + input_fp32.mutable_data(); + int8_t* input_int8_data = + input_int8.mutable_data(); + + float* filter_fp32_data = + filter_fp32.mutable_data(); + int8_t* filter_int8_data = + filter_int8.mutable_data(); + + int* bias_int32_data = + bias_int32.mutable_data(); + + for (int i = 0; i < input_fp32.dims().production(); + i++) { + input_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < filter_fp32.dims().production(); + i++) { + filter_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < bias_int32.dims().production(); + i++) { + bias_int32_data[i] = i % 10 * (i % 3 - 1); + } + + std::vector in_scale; + lite::arm::math::get_tensor_scale( + input_fp32, &in_scale, -1, 127.f); + lite::arm::math::trans_tensor_fp32_to_int8( + &input_fp32, &input_int8, in_scale[0]); + + std::vector w_scale; + lite::arm::math::get_tensor_scale( + filter_fp32, &w_scale, -1, 127.f); + int axis_size = oc; + int inner_size = ic / group * ks * ks; + w_scale = lite::arm::math::get_tensor_scale_n( + filter_fp32_data, axis_size, inner_size, 127.f); + lite::arm::math::fp32_to_int8( + filter_fp32_data, filter_int8_data, + w_scale.data(), axis_size, 1, inner_size); + + operators::ConvParam param; + param.x = &input_int8; + param.filter = &filter_int8; + if (flag_bias) { + param.bias = &bias_int32; + } + param.fuse_relu = false; + param.paddings = std::vector({padding, padding}); + param.strides = std::vector({stride, stride}); + param.dilations = + std::vector({dilation, dilation}); + param.groups = group; + param.output = &output_int32_ref; + conv_compute_ref(param); + + int* output_int32_ref_data = + output_int32_ref.mutable_data(); + + // ============ int8direct_int32 ============ + param.output = &output_int32; + std::unique_ptr ctx_int32( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_int32; + int8direct_int32.init(param, + &ctx_int32->As()); + int8direct_int32.create(param, + &ctx_int32->As()); + int8direct_int32.run(param); + int* output_int32_data = + output_int32.mutable_data(); + for (int i = 0; i < output_int32.dims().production(); + i++) { + EXPECT_NEAR(output_int32_data[i], + output_int32_ref_data[i], 1e-3); + } + + // ============ int8direct_int8 ============ + int8_t* output_int8_ref_data = + output_int8_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_int8( + &output_int32_ref, &output_int8_ref, in_scale[0], + 1, w_scale); + param.output = &output_int8; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_int8( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_int8; + int8direct_int8.init(param, + &ctx_int8->As()); + int8direct_int8.create(param, + &ctx_int8->As()); + int8direct_int8.run(param); + int8_t* output_int8_data = + output_int8.mutable_data(); + for (int i = 0; i < output_int8.dims().production(); + i++) { + EXPECT_NEAR(output_int8_data[i], + output_int8_ref_data[i], 1e-3); + } + + // ============ int8direct_float32 ============ + float* output_fp32_ref_data = + output_fp32_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_fp32( + &output_int32_ref, &output_fp32_ref, in_scale[0], + w_scale); + param.output = &output_fp32; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_fp32( + new KernelContext); + lite::arm::math::DirectConvInt8 + int8direct_fp32; + int8direct_fp32.init(param, + &ctx_fp32->As()); + int8direct_fp32.create(param, + &ctx_fp32->As()); + int8direct_fp32.run(param); + float* output_fp32_data = + output_fp32.mutable_data(); + for (int i = 0; i < output_fp32.dims().production(); + i++) { + EXPECT_NEAR(output_fp32_data[i], + output_fp32_ref_data[i], 1e-3); + } + } + } + } + } + } + } + } + } + } + } + } + } +} + +TEST(conv_depthwise_int8, compute) { + DeviceInfo::Init(); + for (auto n : {1, 2}) { + for (auto ic : {1, 3, 8}) { + for (auto ih : {5, 15, 28}) { + for (auto iw : {5, 15, 28}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto dilation : {1}) { + for (auto stride : {1, 2}) { + for (auto padding : {1, 2}) { + for (auto ks : {3, /*5 */}) { + int group = ic; + int oc = ic; + + bool flag_dw_3x3 = (ks == 3) && (padding == 1) && + (stride == 1 || stride == 2); + bool flag_dw_5x5 = + (ks == 5 && stride == 1 && padding == 2); + bool flag_dw = flag_dw_3x3 || flag_dw_5x5; + if (!flag_dw) continue; + + const int dks = dilation * (ks - 1) + 1; + int oh = (ih + 2 * padding - dks) / stride + 1; + int ow = (iw + 2 * padding - dks) / stride + 1; + std::vector input_shape = {n, ic, ih, iw}; + std::vector filter_shape = {oc, ic / group, ks, + ks}; + std::vector bias_shape({1, oc, 1, 1}); + std::vector output_shape({n, oc, oh, ow}); + + Tensor input_fp32, input_int8; + Tensor filter_fp32, filter_int8; + Tensor bias_int32; + Tensor output_int32_ref, output_int32; + Tensor output_fp32_ref, output_fp32; + Tensor output_int8_ref, output_int8; + + input_fp32.Resize(input_shape); + input_int8.Resize(input_shape); + filter_fp32.Resize(filter_shape); + filter_int8.Resize(filter_shape); + bias_int32.Resize(bias_shape); + + output_int32.Resize(output_shape); + output_int32_ref.Resize(output_shape); + output_fp32_ref.Resize(output_shape); + output_fp32.Resize(output_shape); + output_int8_ref.Resize(output_shape); + output_int8.Resize(output_shape); + + float* input_fp32_data = input_fp32.mutable_data(); + int8_t* input_int8_data = + input_int8.mutable_data(); + float* filter_fp32_data = + filter_fp32.mutable_data(); + int8_t* filter_int8_data = + filter_int8.mutable_data(); + + int* bias_int32_data = bias_int32.mutable_data(); + + for (int i = 0; i < input_fp32.dims().production(); i++) { + input_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < filter_fp32.dims().production(); + i++) { + filter_fp32_data[i] = i % 10 * (i % 3 - 1); + } + for (int i = 0; i < bias_int32.dims().production(); i++) { + bias_int32_data[i] = i % 10 * (i % 3 - 1); + } + + std::vector in_scale; + lite::arm::math::get_tensor_scale( + input_fp32, &in_scale, -1, 127.f); + lite::arm::math::trans_tensor_fp32_to_int8( + &input_fp32, &input_int8, in_scale[0]); + + std::vector w_scale; + lite::arm::math::get_tensor_scale( + filter_fp32, &w_scale, -1, 127.f); + int axis_size = oc; + int inner_size = ic / group * ks * ks; + w_scale = lite::arm::math::get_tensor_scale_n( + filter_fp32_data, axis_size, inner_size, 127.f); + lite::arm::math::fp32_to_int8( + filter_fp32_data, filter_int8_data, w_scale.data(), + axis_size, 1, inner_size); + + operators::ConvParam param; + param.x = &input_int8; + param.filter = &filter_int8; + if (flag_bias) { + param.bias = &bias_int32; + } + param.fuse_relu = false; + param.paddings = std::vector({padding, padding}); + param.strides = std::vector({stride, stride}); + param.dilations = std::vector({dilation, dilation}); + param.groups = group; + param.output = &output_int32_ref; + conv_compute_ref(param); + + int* output_int32_ref_data = + output_int32_ref.mutable_data(); + + // ============ int8depthwise_int32 ============ + param.output = &output_int32; + std::unique_ptr ctx_int32( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_int32; + int8depthwise_int32.init(param, + &ctx_int32->As()); + int8depthwise_int32.create(param, + &ctx_int32->As()); + int8depthwise_int32.run(param); + int* output_int32_data = output_int32.mutable_data(); + for (int i = 0; i < output_int32.dims().production(); + i++) { + EXPECT_NEAR(output_int32_data[i], + output_int32_ref_data[i], 1e-3); + } + + // ============ int8depthwise_int8============ + int8_t* output_int8_ref_data = + output_int8_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_int8( + &output_int32_ref, &output_int8_ref, in_scale[0], 1, + w_scale); + param.output = &output_int8; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_int8( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_int8; + int8depthwise_int8.init(param, + &ctx_int8->As()); + int8depthwise_int8.create(param, + &ctx_int8->As()); + int8depthwise_int8.run(param); + int8_t* output_int8_data = + output_int8.mutable_data(); + for (int i = 0; i < output_int8.dims().production(); + i++) { + EXPECT_NEAR(output_int8_data[i], + output_int8_ref_data[i], 1e-3); + } + + // ============int8depthwise_float32 ============ + float* output_fp32_ref_data = + output_fp32_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_fp32( + &output_int32_ref, &output_fp32_ref, in_scale[0], + w_scale); + param.output = &output_fp32; + param.input_scale = in_scale[0]; + param.output_scale = 1; + param.weight_scale = w_scale; + std::unique_ptr ctx_fp32( + new KernelContext); + lite::arm::math::DepthwiseConvInt8 + int8depthwise_fp32; + int8depthwise_fp32.init(param, + &ctx_fp32->As()); + int8depthwise_fp32.create(param, + &ctx_fp32->As()); + int8depthwise_fp32.run(param); + float* output_fp32_data = + output_fp32.mutable_data(); + for (int i = 0; i < output_fp32.dims().production(); + i++) { + EXPECT_NEAR(output_fp32_data[i], + output_fp32_ref_data[i], 1e-3); + } + } + } + } + } + } + } + } + } + } + } +} + TEST(conv_arm, compute) { DeviceInfo::Init(); #if 1