diff --git a/paddle/fluid/lite/core/lite_tensor.h b/paddle/fluid/lite/core/lite_tensor.h index ecc8b0629c070588a8c1bf0c3f30dd34c3a957a7..abdc876e1e0238cd49bd9d67a9cdafd340a46b98 100644 --- a/paddle/fluid/lite/core/lite_tensor.h +++ b/paddle/fluid/lite/core/lite_tensor.h @@ -47,7 +47,7 @@ class DDimLite : public DDimBase { std::multiplies()); } const std::vector &data() const { return data_; } - value_type count(int start, int end) { + value_type count(int start, int end) const { if (start < 0) { start = 0; } diff --git a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc index 6fc05dbe18b99f2b13c4c114ac962163d16ca05b..b6da4bbe850fef721547935143e50219d62f0282 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute_test.cc @@ -26,27 +26,10 @@ namespace lite { namespace kernels { namespace arm { -static float compute_max_kernel(const float* din, int64_t size) { - float max_value = -std::numeric_limits::max(); - for (int64_t i = 0; i < size; i++) { - max_value = max_value > din[0] ? max_value : din[0]; - } - LOG(INFO) << "[max_value]: " << max_value; - return max_value; -} - -static std::vector get_tensor_scale_n(const float* in_data, - int axis_size, int64_t inner_size, - float scale_factor) { - std::vector scale_out(axis_size); - for (int c = 0; c < axis_size; ++c) { // num - const float* ptr_in = in_data + c * inner_size; // channel*width*height - scale_out[c] = compute_max_kernel(ptr_in, inner_size) / scale_factor; - } - for (auto s : scale_out) { - LOG(INFO) << "[Scale out]: " << s; - } - return scale_out; +static int get_rand(int start, int end) { + int i = rand(); // NOLINT + i = (i % (end - start)) + start; + return i; } template @@ -184,16 +167,16 @@ TEST(conv_arm_int8, init) { ASSERT_EQ(float_out.target(), TARGET(kARM)); } -TEST(conv_arm_int8, compute) { +TEST(conv_arm_int8, int8_int32) { DeviceInfo::Init(); for (auto n : {2}) { for (auto ic : {6}) { for (auto oc : {6}) { for (auto ih : {9}) { for (auto iw : {9}) { - for (auto flag_bias : {false, /*true*/}) { - for (auto flag_relu : {false, /*true*/}) { - for (auto depthwise : {false, /*true*/}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto depthwise : {false, true}) { for (auto dilation : {1}) { for (auto stride : {1}) { for (auto padding : {0}) { @@ -226,11 +209,11 @@ TEST(conv_arm_int8, compute) { filter_int8.mutable_data(); for (int i = 0; i < input_int8.dims().production(); i++) { - input_int8_data[i] = 1.f; + input_int8_data[i] = get_rand(-128, 127); } for (int i = 0; i < filter_int8.dims().production(); i++) { - filter_int8_data[i] = 1.f; + filter_int8_data[i] = get_rand(-128, 127); } operators::ConvParam param; @@ -278,6 +261,214 @@ TEST(conv_arm_int8, compute) { } } +TEST(conv_arm_int8, int8_fp32) { + DeviceInfo::Init(); + for (auto n : {2}) { + for (auto ic : {6}) { + for (auto oc : {6}) { + for (auto ih : {9}) { + for (auto iw : {9}) { + for (auto flag_bias : {false, true}) { + for (auto flag_relu : {false, true}) { + for (auto depthwise : {false, true}) { + for (auto dilation : {1}) { + for (auto stride : {1}) { + for (auto padding : {0}) { + for (auto ks : {1}) { + int group = 1; + if (depthwise) { // depthwise convolution ? + group = oc = ic; + } + + const int dks = dilation * (ks - 1) + 1; + int oh = (ih + 2 * padding - dks) / stride + 1; + int ow = (iw + 2 * padding - dks) / stride + 1; + std::vector input_shape = {n, ic, ih, iw}; + std::vector filter_shape = {oc, ic / group, + ks, ks}; + std::vector bias_shape({1, oc, 1, 1}); + std::vector output_shape({n, oc, oh, ow}); + + Tensor input_fp32, input_int8; + Tensor filter_fp32, filter_int8; + Tensor bias_fp32, bias_int8; + Tensor output_int32_ref, output_int32; + Tensor output_fp32_ref, output_fp32; + Tensor output_int8_ref, output_int8; + + input_fp32.Resize(input_shape); + input_int8.Resize(input_shape); + filter_fp32.Resize(filter_shape); + filter_int8.Resize(filter_shape); + bias_fp32.Resize(bias_shape); + bias_int8.Resize(bias_shape); + output_int32.Resize(output_shape); + output_int32_ref.Resize(output_shape); + output_fp32_ref.Resize(output_shape); + output_fp32.Resize(output_shape); + output_int8_ref.Resize(output_shape); + output_int8.Resize(output_shape); + + float* input_fp32_data = + input_fp32.mutable_data(); + int8_t* input_int8_data = + input_int8.mutable_data(); + + float* filter_fp32_data = + filter_fp32.mutable_data(); + int8_t* filter_int8_data = + filter_int8.mutable_data(); + + float* bias_fp32_data = + bias_fp32.mutable_data(); + int8_t* bias_int8_data = + bias_int8.mutable_data(); + + for (int i = 0; i < input_fp32.dims().production(); + i++) { + input_fp32_data[i] = get_rand(-100, 100) / 100.f; + } + for (int i = 0; i < filter_fp32.dims().production(); + i++) { + filter_fp32_data[i] = get_rand(-100, 100) / 100.f; + } + for (int i = 0; i < bias_fp32.dims().production(); + i++) { + bias_fp32_data[i] = get_rand(-100, 100) / 100.f; + } + + std::vector in_scale; + lite::arm::math::get_tensor_scale( + input_fp32, &in_scale, -1, 127.f); + lite::arm::math::trans_tensor_fp32_to_int8( + &input_fp32, &input_int8, in_scale[0]); + + std::vector w_scale; + lite::arm::math::get_tensor_scale( + filter_fp32, &w_scale, -1, 127.f); + int axis_size = oc; + int inner_size = ic / group * ks * ks; + w_scale = lite::arm::math::get_tensor_scale_n( + filter_fp32_data, axis_size, inner_size, 127.f); + lite::arm::math::fp32_to_int8( + filter_fp32_data, filter_int8_data, + w_scale.data(), axis_size, 1, inner_size); + + operators::ConvParam param; + param.x = &input_int8; + param.filter = &filter_int8; + param.bias = &bias_int8; + param.fuse_relu = false; + param.paddings = std::vector({padding, padding}); + param.strides = std::vector({stride, stride}); + param.dilations = + std::vector({dilation, dilation}); + param.groups = group; + param.output = &output_int32_ref; + conv_compute_ref(param); + + int32_t* output_int32_ref_data = + output_int32_ref.mutable_data(); + + // ============ int8gemm_int32 ============ + param.output = &output_int32; + std::unique_ptr ctx_int32( + new KernelContext); + lite::arm::math::GemmLikeConvInt8 + int8gemm_int32; + int8gemm_int32.init(param, + &ctx_int32->As()); + int8gemm_int32.create(param, + &ctx_int32->As()); + int8gemm_int32.run(param); + int32_t* output_int32_data = + output_int32.mutable_data(); + for (int i = 0; i < output_int32.dims().production(); + i++) { + EXPECT_NEAR(output_int32_data[i], + output_int32_ref_data[i], 1e-3); + } + + // ============ int8gemm_int8 ============ + int8_t* output_int8_ref_data = + output_int8_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_int8( + &output_int32_ref, &output_int8_ref, in_scale[0], + 1, w_scale); + param.output = &output_int8; + param.input_scale = in_scale[0]; + param.output_scale = 1; + std::vector w_scale_for_int8; + for (auto ws : w_scale) { + ws *= param.input_scale; + ws /= param.output_scale; + w_scale_for_int8.push_back(ws); + } + param.weight_scale = w_scale_for_int8; + + std::unique_ptr ctx_int8( + new KernelContext); + lite::arm::math::GemmLikeConvInt8 + int8gemm_int8; + int8gemm_int8.init(param, + &ctx_int8->As()); + int8gemm_int8.create(param, + &ctx_int8->As()); + int8gemm_int8.run(param); + int8_t* output_int8_data = + output_int8.mutable_data(); + for (int i = 0; i < output_int8.dims().production(); + i++) { + EXPECT_NEAR(output_int8_data[i], + output_int8_ref_data[i], 1e-3); + } + + // ============ int8gemm_float32 ============ + float* output_fp32_ref_data = + output_fp32_ref.mutable_data(); + lite::arm::math::trans_tensor_int32_to_fp32( + &output_int32_ref, &output_fp32_ref, in_scale[0], + w_scale); + param.output = &output_fp32; + param.input_scale = in_scale[0]; + param.output_scale = 1; + std::vector w_scale_for_fp32; + for (auto ws : w_scale) { + ws *= param.input_scale; + w_scale_for_fp32.push_back(ws); + } + param.weight_scale = w_scale_for_fp32; + + std::unique_ptr ctx_fp32( + new KernelContext); + lite::arm::math::GemmLikeConvInt8 + int8gemm_fp32; + int8gemm_fp32.init(param, + &ctx_fp32->As()); + int8gemm_fp32.create(param, + &ctx_fp32->As()); + int8gemm_fp32.run(param); + float* output_fp32_data = + output_fp32.mutable_data(); + for (int i = 0; i < output_fp32.dims().production(); + i++) { + EXPECT_NEAR(output_fp32_data[i], + output_fp32_ref_data[i], 1e-3); + } + } + } + } + } + } + } + } + } + } + } + } + } +} + TEST(conv_arm, compute) { DeviceInfo::Init(); #if 1