// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include "lite/core/context.h" #include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/conv_transpose_compute.h" #endif // LITE_WITH_ARM DEFINE_int32(power_mode, 3, "power mode: " "0 for POWER_HIGH;" "1 for POWER_LOW;" "2 for POWER_FULL;" "3 for NO_BIND"); DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(repeats, 1, "repeats times"); DEFINE_bool(basic_test, false, "do all tests"); DEFINE_bool(check_result, true, "check the result"); DEFINE_int32(batch, 1, "batch size"); DEFINE_int32(in_channel, 32, "input channel"); DEFINE_int32(in_height, 32, "input height"); DEFINE_int32(in_width, 32, "input width"); DEFINE_int32(out_channel, 64, "output channel"); DEFINE_int32(group, 1, "group"); DEFINE_int32(kernel_h, 2, "kernel height"); DEFINE_int32(kernel_w, 2, "kernel width"); DEFINE_int32(pad_h, 0, "pad height"); DEFINE_int32(pad_w, 0, "pad width"); DEFINE_int32(stride_h, 2, "stride height"); DEFINE_int32(stride_w, 2, "stride width"); DEFINE_int32(dila_h, 1, "dilation height"); DEFINE_int32(dila_w, 1, "dilation width"); DEFINE_bool(flag_relu, false, "do relu"); DEFINE_bool(flag_bias, false, "with bias"); typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::ConvParam ConvParam; using paddle::lite::profile::Timer; DDim compute_out_dim(const DDim& dim_in, const paddle::lite::operators::ConvParam& param) { auto filter_dims = param.filter->dims(); DDim output_shape = dim_in; output_shape[1] = filter_dims[1] * param.groups; auto paddings = *param.paddings; auto dilations = *param.dilations; for (int i = 0; i < 2; i++) { int kernel_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; int output_len = (dim_in[i + 2] - 1) * param.strides[i] + kernel_extent - (paddings[2 * i] + paddings[2 * i + 1]); output_shape[i + 2] = output_len; } return output_shape; } #ifdef LITE_WITH_ARM void test_conv_transpose_fp32(const std::vector& input_dims, const DDim& weight_dim, int group, const std::vector& strides, const std::vector& pads, const std::vector& dilas, bool flag_bias, bool flag_relu, const std::vector& thread_num, const std::vector& power_mode) { #ifdef LITE_WITH_ARM paddle::lite::DeviceInfo::Init(); #endif ConvParam param; param.x = new Tensor; param.x->set_precision(PRECISION(kFloat)); param.filter = new Tensor; param.filter->Resize(weight_dim); param.filter->set_precision(PRECISION(kFloat)); if (flag_bias) { param.bias = new Tensor; param.bias->Resize({weight_dim[0]}); param.bias->set_precision(PRECISION(kFloat)); } param.strides = strides; param.paddings = std::make_shared>(pads); param.dilations = std::make_shared>(dilas); param.fuse_relu = flag_relu; param.groups = group; param.output = new Tensor; param.output->set_precision(PRECISION(kFloat)); paddle::lite::fill_tensor_rand(*param.filter, -1.f, 1.f); // paddle::lite::fill_tensor_const(*param.filter, 1.f); if (flag_bias) { paddle::lite::fill_tensor_rand(*param.bias, -1.f, 1.f); // paddle::lite::fill_tensor_const(*param.bias, 1.f); } Tensor tmp_weights; tmp_weights.Resize(weight_dim); tmp_weights.CopyDataFrom(*param.filter); auto wptr = tmp_weights.data(); auto bias_ptr = flag_bias ? param.bias->data() : nullptr; for (auto& cls : power_mode) { for (auto& th : thread_num) { paddle::lite::kernels::arm::Conv2DTransposeCompute conv_t; std::unique_ptr ctx1( new paddle::lite::KernelContext); auto& ctx = ctx1->As(); ctx.SetRunMode(static_cast(cls), th); conv_t.SetParam(param); conv_t.SetContext(std::move(ctx1)); for (auto& dim_in : input_dims) { CHECK_EQ(weight_dim[0], dim_in[1]) << "input channel must equal to weights channel"; DDim dim_out = compute_out_dim(dim_in, param); if (dim_out[2] < 1 || dim_out[3] < 1) { continue; } param.x->Resize(dim_in); param.output->Resize(dim_out); param.filter->CopyDataFrom(tmp_weights); // prepare for run conv_t.PrepareForRun(); paddle::lite::fill_tensor_rand(*param.x, -1.f, 1.f); // paddle::lite::fill_tensor_const(*param.x, 1.f); auto din = param.x->data(); Tensor tout_basic; if (FLAGS_check_result) { tout_basic.set_precision(PRECISION(kFloat)); tout_basic.Resize(dim_out); fill_tensor_const(tout_basic, 0.f); auto dout_basic = tout_basic.mutable_data(); deconv_basic(din, dout_basic, dim_in[0], dim_out[1], dim_out[2], dim_out[3], dim_in[1], dim_in[2], dim_in[3], wptr, bias_ptr, group, weight_dim[3], weight_dim[2], strides[1], strides[0], dilas[1], dilas[0], pads[2], pads[3], pads[0], pads[1], flag_bias, flag_relu); } /// warm up for (int i = 0; i < FLAGS_warmup; ++i) { conv_t.Launch(); } /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { t0.Start(); conv_t.Launch(); t0.Stop(); } float gops = 2.f * tmp_weights.numel() * dim_in[0] * dim_in[2] * dim_in[3]; LOG(INFO) << "conv fp32: input shape: " << dim_in << ", output shape" << dim_out << ",running time, avg: " << t0.LapTimes().Avg() << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; double max_diff = 0; tensor_cmp_host(tout_basic, *param.output, max_ratio, max_diff); LOG(INFO) << "compare result, max diff: " << max_diff << ", max ratio: " << max_ratio; if (std::abs(max_ratio) > 1e-3f) { if (max_diff > 5e-4f) { LOG(WARNING) << "basic result"; print_tensor(tout_basic); LOG(WARNING) << "lite result"; print_tensor(*param.output); Tensor tdiff; tdiff.Resize(tout_basic.dims()); tdiff.set_precision(PRECISION(kFloat)); tensor_diff(tout_basic, *param.output, tdiff); print_tensor(tdiff); LOG(FATAL) << "test fp32 conv: input: " << dim_in << ", output: " << dim_out << ", weight dim: " << weight_dim << ", pad: " << pads[0] << ", " << pads[1] << ", " << pads[2] << ", " << pads[3] << ", stride: " << strides[0] << ", " << strides[1] << ", dila_: " << dilas[0] << ", " << dilas[1] << ", bias: " << (flag_bias ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false") << ", threads: " << th << ", power_mode: " << cls << " failed!!\n"; } } } LOG(INFO) << "test fp32 conv: input: " << dim_in << ", output: " << dim_out << ", weight dim: " << weight_dim << ", pad: " << pads[0] << ", " << pads[1] << ", " << pads[2] << ", " << pads[3] << ", stride: " << strides[0] << ", " << strides[1] << ", dila_: " << dilas[0] << ", " << dilas[1] << ", bias: " << (flag_bias ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false") << ", threads: " << th << ", power_mode: " << cls << " successed!!\n"; } } } delete param.x; delete param.filter; delete param.output; delete param.bias; } #else void test_conv_transpose_fp32(const std::vector& input_dims, const DDim& weight_dim, int group, const std::vector& strides, const std::vector& pads, const std::vector& dilas, bool flag_bias, bool flag_relu, const std::vector& thread_num, const std::vector& power_mode) {} #endif // LITE_WITH_ARM #if 1 /// random param conv TEST(TestConvRand, test_conv_transpose_rand) { if (FLAGS_basic_test) { for (auto& cin : {1, 3, 8, 16}) { for (auto& cout : {1, 5, 8, 16}) { for (auto& g : {1, 2}) { for (auto& kw : {1, 2, 3}) { for (auto& kh : {1, 2, 3}) { for (auto& stride : {1, 2}) { for (auto& pad_h0 : {0, 1, 2}) { for (auto& pad_h1 : {0, 1, 2}) { for (auto& pad_w0 : {0, 1, 2}) { for (auto& pad_w1 : {0, 1, 2}) { for (auto& dila : {1, 2}) { for (auto& flag_bias : {false, true}) { for (auto& flag_relu : {false, true}) { if (cin % g != 0 || cout % g != 0) { continue; } std::vector dims; DDim weights_dim({cin, cout / g, kh, kw}); for (auto& batch : {1, 2}) { for (auto& h : {1, 3, 19, 32, 28}) { dims.push_back(DDim({batch, cin, h, h})); } } test_conv_transpose_fp32( dims, weights_dim, g, {stride, stride}, {pad_h0, pad_h1, pad_w0, pad_w1}, {dila, dila}, flag_bias, flag_relu, {1, 4}, {FLAGS_power_mode}); } } } } } } } } } } } } } } } #endif /// random param conv #if 1 /// custom TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) { CHECK_EQ(FLAGS_in_channel % FLAGS_group, 0) << "input channel must be divided by group"; CHECK_EQ(FLAGS_out_channel % FLAGS_group, 0) << "num_output must be divided by group"; test_conv_transpose_fp32( {DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})}, DDim({FLAGS_in_channel, FLAGS_out_channel / FLAGS_group, FLAGS_kernel_h, FLAGS_kernel_w}), FLAGS_group, {FLAGS_stride_h, FLAGS_stride_w}, {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w}, {FLAGS_dila_h, FLAGS_dila_w}, FLAGS_flag_bias, FLAGS_flag_relu, {FLAGS_threads}, {FLAGS_power_mode}); } #endif // custom