提交 f8ff5aa4 编写于 作者: X Xiaoyang LI 提交者: GitHub

remove log in reshape, fix conv error when padding size=4 (#2199)

* remove log in reshape, fix conv error when padding size=4, test=develop

* fix style, test=develop

* remove useless code, test=develop

* remove redundant model test file, test=develop

* change cluster to power_mode, test=develop

* fix build error, test=develop

* change cluster to power_mode, test=develop

* change opt_nb to use_optimize_nb, test=develop

* null, test=develop
上级 e82ebee8
...@@ -21,13 +21,20 @@ ...@@ -21,13 +21,20 @@
#include "lite/api/paddle_use_passes.h" #include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h" #include "lite/api/test_helper.h"
#include "lite/core/device_info.h" #include "lite/core/device_info.h"
#include "lite/tests/utils/timer.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
#include "lite/utils/string.h" #include "lite/utils/string.h"
using paddle::lite::Timer;
DEFINE_string(input_shape, DEFINE_string(input_shape,
"1,3,224,224", "1,3,224,224",
"input shapes, separated by colon and comma"); "input shapes, separated by colon and comma");
DEFINE_bool(use_optimize_nb,
false,
"optimized & naive buffer model for mobile devices");
namespace paddle { namespace paddle {
namespace lite_api { namespace lite_api {
...@@ -58,15 +65,14 @@ void OutputOptModel(const std::string& load_model_dir, ...@@ -58,15 +65,14 @@ void OutputOptModel(const std::string& load_model_dir,
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void Run(const std::vector<std::vector<int64_t>>& input_shapes, void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir, const std::string& model_dir,
const int repeat, const PowerMode power_mode,
const int thread_num, const int thread_num,
const int repeat,
const int warmup_times = 0) { const int warmup_times = 0) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
#endif
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_dir(model_dir); config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config); auto predictor = lite_api::CreatePaddlePredictor(config);
...@@ -87,17 +93,22 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -87,17 +93,22 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
predictor->Run(); predictor->Run();
} }
auto start = lite::GetCurrentUS(); Timer ti;
for (int i = 0; i < repeat; ++i) { for (int j = 0; j < repeat; ++j) {
ti.start();
predictor->Run(); predictor->Run();
ti.end();
LOG(INFO) << "iter: " << j << ", time: " << ti.latest_time() << " ms";
} }
auto end = lite::GetCurrentUS();
LOG(INFO) << "================== Speed Report ==================="; LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num LOG(INFO) << "Model: " << model_dir
<< ", warmup: " << warmup_times << ", repeats: " << repeat << ", power_mode: " << static_cast<int>(power_mode)
<< ", spend " << (end - start) / repeat / 1000.0 << ", threads num " << thread_num << ", warmup: " << warmup_times
<< " ms in average."; << ", repeats: " << repeat << ", avg time: " << ti.get_average_ms()
<< " ms"
<< ", min time: " << ti.get_min_time() << " ms"
<< ", max time: " << ti.get_max_time() << " ms.";
auto output = predictor->GetOutput(0); auto output = predictor->GetOutput(0);
auto out = output->data<float>(); auto out = output->data<float>();
...@@ -122,7 +133,12 @@ int main(int argc, char** argv) { ...@@ -122,7 +133,12 @@ int main(int argc, char** argv) {
<< "--model_dir /path/to/your/model"; << "--model_dir /path/to/your/model";
exit(0); exit(0);
} }
std::string save_optimized_model_dir = FLAGS_model_dir + "opt2"; std::string save_optimized_model_dir = "";
if (FLAGS_use_optimize_nb) {
save_optimized_model_dir = FLAGS_model_dir;
} else {
save_optimized_model_dir = FLAGS_model_dir + "opt2";
}
auto split_string = auto split_string =
[](const std::string& str_in) -> std::vector<std::string> { [](const std::string& str_in) -> std::vector<std::string> {
...@@ -164,17 +180,21 @@ int main(int argc, char** argv) { ...@@ -164,17 +180,21 @@ int main(int argc, char** argv) {
input_shapes.push_back(get_shape(str_input_shapes[i])); input_shapes.push_back(get_shape(str_input_shapes[i]));
} }
// Output optimized model if (!FLAGS_use_optimize_nb) {
paddle::lite_api::OutputOptModel( // Output optimized model
FLAGS_model_dir, save_optimized_model_dir, input_shapes); paddle::lite_api::OutputOptModel(
FLAGS_model_dir, save_optimized_model_dir, input_shapes);
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// Run inference using optimized model // Run inference using optimized model
paddle::lite_api::Run(input_shapes, paddle::lite_api::Run(
save_optimized_model_dir, input_shapes,
FLAGS_repeats, save_optimized_model_dir,
FLAGS_threads, static_cast<paddle::lite_api::PowerMode>(FLAGS_power_mode),
FLAGS_warmup); FLAGS_threads,
FLAGS_repeats,
FLAGS_warmup);
#endif #endif
return 0; return 0;
} }
...@@ -22,6 +22,13 @@ ...@@ -22,6 +22,13 @@
DEFINE_string(model_dir, "", "model dir"); DEFINE_string(model_dir, "", "model dir");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
DEFINE_int32(power_mode,
3,
"arm power mode: "
"0 for big cluster, "
"1 for little cluster, "
"2 for all cores, "
"3 for no bind");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(im_width, 224, "image width"); DEFINE_int32(im_width, 224, "image width");
DEFINE_int32(im_height, 224, "image height"); DEFINE_int32(im_height, 224, "image height");
......
...@@ -34,10 +34,6 @@ bool ConvOpLite::CheckShape() const { ...@@ -34,10 +34,6 @@ bool ConvOpLite::CheckShape() const {
CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size()); CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size());
CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U); CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U);
// CHECK_EQ_OR_FALSE(param_.paddings.size(), param_.strides.size());
// CHECK_EQ_OR_FALSE(in_dims[1], filter_dims[1] * param_.groups);
// CHECK_EQ_OR_FALSE(filter_dims[0] % param_.groups, 0);
CHECK_EQ_OR_FALSE(filter_dims.size(), 4UL); CHECK_EQ_OR_FALSE(filter_dims.size(), 4UL);
return true; return true;
......
...@@ -48,7 +48,6 @@ bool ReshapeOp::InferShape() const { ...@@ -48,7 +48,6 @@ bool ReshapeOp::InferShape() const {
auto x_dims = param_.x->dims(); auto x_dims = param_.x->dims();
auto output_dims = ValidateShape(final_shape, x_dims); auto output_dims = ValidateShape(final_shape, x_dims);
LOG(INFO) << "output_dims:" << output_dims;
param_.output->Resize(output_dims); param_.output->Resize(output_dims);
auto out_lod = param_.output->mutable_lod(); auto out_lod = param_.output->mutable_lod();
*out_lod = param_.x->lod(); *out_lod = param_.x->lod();
......
...@@ -24,7 +24,13 @@ ...@@ -24,7 +24,13 @@
#include "lite/kernels/arm/conv_compute.h" #include "lite/kernels/arm/conv_compute.h"
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(power_mode,
3,
"power mode: "
"0 for POWER_HIGH;"
"1 for POWER_LOW;"
"2 for POWER_FULL;"
"3 for NO_BIND");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias"); ...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
typedef paddle::lite::DDim DDim; typedef paddle::lite::DDim DDim;
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
typedef paddle::lite::operators::ConvParam ConvParam; typedef paddle::lite::operators::ConvParam ConvParam;
using paddle::lite::Timer;
DDim compute_out_dim(const DDim& dim_in, DDim compute_out_dim(const DDim& dim_in,
const paddle::lite::operators::ConvParam& param) { const paddle::lite::operators::ConvParam& param) {
...@@ -87,7 +94,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -87,7 +94,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) { const std::vector<int>& power_mode) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
paddle::lite::DeviceInfo::Init(); paddle::lite::DeviceInfo::Init();
#endif #endif
...@@ -120,7 +127,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -120,7 +127,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
auto wptr = param.filter->data<float>(); auto wptr = param.filter->data<float>();
auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr; auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
for (auto& cls : cluster_id) { for (auto& cls : power_mode) {
for (auto& th : thread_num) { for (auto& th : thread_num) {
paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat), paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat),
PRECISION(kFloat)> PRECISION(kFloat)>
...@@ -192,7 +199,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -192,7 +199,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
conv.Launch(); conv.Launch();
} }
/// compute /// compute
lite::test::Timer t0; Timer t0;
for (int i = 0; i < FLAGS_repeats; ++i) { for (int i = 0; i < FLAGS_repeats; ++i) {
t0.start(); t0.start();
conv.Launch(); conv.Launch();
...@@ -233,7 +240,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -233,7 +240,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " failed!!\n"; << " failed!!\n";
} }
} }
...@@ -245,7 +252,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -245,7 +252,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " successed!!\n"; << " successed!!\n";
} }
} }
...@@ -266,7 +273,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -266,7 +273,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) {} const std::vector<int>& power_mode) {}
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
#if 1 /// 3x3dw #if 1 /// 3x3dw
...@@ -293,7 +300,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) { ...@@ -293,7 +300,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -327,7 +334,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) { ...@@ -327,7 +334,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -364,7 +371,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) { ...@@ -364,7 +371,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -398,7 +405,7 @@ TEST(TestConv3x3s1, test_conv_3x3s1) { ...@@ -398,7 +405,7 @@ TEST(TestConv3x3s1, test_conv_3x3s1) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -432,7 +439,7 @@ TEST(TestConv3x3s2, test_conv_3x3s2) { ...@@ -432,7 +439,7 @@ TEST(TestConv3x3s2, test_conv_3x3s2) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -474,7 +481,7 @@ TEST(TestConvRand, test_conv_rand) { ...@@ -474,7 +481,7 @@ TEST(TestConvRand, test_conv_rand) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -508,6 +515,6 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) { ...@@ -508,6 +515,6 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) {
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
{FLAGS_threads}, {FLAGS_threads},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
#endif // custom #endif // custom
...@@ -24,7 +24,13 @@ ...@@ -24,7 +24,13 @@
#include "lite/kernels/arm/conv_compute.h" #include "lite/kernels/arm/conv_compute.h"
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(power_mode,
3,
"power mode: "
"0 for POWER_HIGH;"
"1 for POWER_LOW;"
"2 for POWER_FULL;"
"3 for NO_BIND");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias"); ...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
typedef paddle::lite::DDim DDim; typedef paddle::lite::DDim DDim;
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
typedef paddle::lite::operators::ConvParam ConvParam; typedef paddle::lite::operators::ConvParam ConvParam;
using paddle::lite::Timer;
DDim compute_out_dim(const DDim& dim_in, DDim compute_out_dim(const DDim& dim_in,
const paddle::lite::operators::ConvParam& param) { const paddle::lite::operators::ConvParam& param) {
...@@ -124,7 +131,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -124,7 +131,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) { const std::vector<int>& power_mode) {
paddle::lite::DeviceInfo::Init(); paddle::lite::DeviceInfo::Init();
ConvParam param_int8_out; ConvParam param_int8_out;
ConvParam param_fp32_out; ConvParam param_fp32_out;
...@@ -181,7 +188,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -181,7 +188,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
1, 1,
weight_dim.count(1, 4)); weight_dim.count(1, 4));
for (auto& cls : cluster_id) { for (auto& cls : power_mode) {
for (auto& th : thread_num) { for (auto& th : thread_num) {
std::unique_ptr<paddle::lite::KernelContext> ctx1( std::unique_ptr<paddle::lite::KernelContext> ctx1(
new paddle::lite::KernelContext); new paddle::lite::KernelContext);
...@@ -300,7 +307,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -300,7 +307,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
conv_int8_int8.Launch(); conv_int8_int8.Launch();
} }
/// compute fp32 output /// compute fp32 output
lite::test::Timer t0; Timer t0;
for (int i = 0; i < FLAGS_repeats; ++i) { for (int i = 0; i < FLAGS_repeats; ++i) {
t0.start(); t0.start();
conv_int8_fp32.Launch(); conv_int8_fp32.Launch();
...@@ -356,7 +363,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -356,7 +363,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " failed!!\n"; << " failed!!\n";
} }
} }
...@@ -414,7 +421,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -414,7 +421,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " failed!!\n"; << " failed!!\n";
} }
} }
...@@ -426,7 +433,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -426,7 +433,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " successed!!\n"; << " successed!!\n";
} }
} }
...@@ -444,7 +451,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims, ...@@ -444,7 +451,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) {} const std::vector<int>& power_mode) {}
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
#if 1 /// 3x3dw #if 1 /// 3x3dw
...@@ -471,7 +478,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) { ...@@ -471,7 +478,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -505,7 +512,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) { ...@@ -505,7 +512,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -542,7 +549,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) { ...@@ -542,7 +549,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -576,7 +583,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) { ...@@ -576,7 +583,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -610,7 +617,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) { ...@@ -610,7 +617,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -652,7 +659,7 @@ TEST(TestConvRandInt8, test_conv_rand) { ...@@ -652,7 +659,7 @@ TEST(TestConvRandInt8, test_conv_rand) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -686,6 +693,6 @@ TEST(TestConvCustomInt8, test_conv_custom_size) { ...@@ -686,6 +693,6 @@ TEST(TestConvCustomInt8, test_conv_custom_size) {
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
{FLAGS_threads}, {FLAGS_threads},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
#endif // custom #endif // custom
...@@ -24,7 +24,13 @@ ...@@ -24,7 +24,13 @@
#include "lite/kernels/arm/conv_transpose_compute.h" #include "lite/kernels/arm/conv_transpose_compute.h"
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(power_mode,
3,
"power mode: "
"0 for POWER_HIGH;"
"1 for POWER_LOW;"
"2 for POWER_FULL;"
"3 for NO_BIND");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, false, "with bias"); ...@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, false, "with bias");
typedef paddle::lite::DDim DDim; typedef paddle::lite::DDim DDim;
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
typedef paddle::lite::operators::ConvParam ConvParam; typedef paddle::lite::operators::ConvParam ConvParam;
using paddle::lite::Timer;
DDim compute_out_dim(const DDim& dim_in, DDim compute_out_dim(const DDim& dim_in,
const paddle::lite::operators::ConvParam& param) { const paddle::lite::operators::ConvParam& param) {
...@@ -78,7 +85,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -78,7 +85,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) { const std::vector<int>& power_mode) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
paddle::lite::DeviceInfo::Init(); paddle::lite::DeviceInfo::Init();
#endif #endif
...@@ -114,7 +121,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -114,7 +121,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
auto wptr = tmp_weights.data<float>(); auto wptr = tmp_weights.data<float>();
auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr; auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
for (auto& cls : cluster_id) { for (auto& cls : power_mode) {
for (auto& th : thread_num) { for (auto& th : thread_num) {
paddle::lite::kernels::arm::Conv2DTransposeCompute conv_t; paddle::lite::kernels::arm::Conv2DTransposeCompute conv_t;
std::unique_ptr<paddle::lite::KernelContext> ctx1( std::unique_ptr<paddle::lite::KernelContext> ctx1(
...@@ -185,7 +192,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -185,7 +192,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
conv_t.Launch(); conv_t.Launch();
} }
/// compute /// compute
lite::test::Timer t0; Timer t0;
for (int i = 0; i < FLAGS_repeats; ++i) { for (int i = 0; i < FLAGS_repeats; ++i) {
t0.start(); t0.start();
conv_t.Launch(); conv_t.Launch();
...@@ -226,7 +233,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -226,7 +233,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " failed!!\n"; << " failed!!\n";
} }
} }
...@@ -238,7 +245,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -238,7 +245,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
<< ", dila_: " << dilas[0] << ", " << dilas[1] << ", dila_: " << dilas[0] << ", " << dilas[1]
<< ", bias: " << (flag_bias ? "true" : "false") << ", bias: " << (flag_bias ? "true" : "false")
<< ", relu: " << (flag_relu ? "true" : "false") << ", relu: " << (flag_relu ? "true" : "false")
<< ", threads: " << th << ", cluster: " << cls << ", threads: " << th << ", power_mode: " << cls
<< " successed!!\n"; << " successed!!\n";
} }
} }
...@@ -259,7 +266,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims, ...@@ -259,7 +266,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
bool flag_bias, bool flag_bias,
bool flag_relu, bool flag_relu,
const std::vector<int>& thread_num, const std::vector<int>& thread_num,
const std::vector<int>& cluster_id) {} const std::vector<int>& power_mode) {}
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
#if 1 /// random param conv #if 1 /// random param conv
...@@ -294,7 +301,7 @@ TEST(TestConvRand, test_conv_transpose_rand) { ...@@ -294,7 +301,7 @@ TEST(TestConvRand, test_conv_transpose_rand) {
flag_bias, flag_bias,
flag_relu, flag_relu,
{1, 2, 4}, {1, 2, 4},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
} }
} }
...@@ -328,6 +335,6 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) { ...@@ -328,6 +335,6 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) {
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
{FLAGS_threads}, {FLAGS_threads},
{FLAGS_cluster}); {FLAGS_power_mode});
} }
#endif // custom #endif // custom
...@@ -25,8 +25,15 @@ ...@@ -25,8 +25,15 @@
#include "lite/tests/utils/timer.h" #include "lite/tests/utils/timer.h"
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
using paddle::lite::Timer;
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(power_mode,
3,
"power mode: "
"0 for POWER_HIGH;"
"1 for POWER_LOW;"
"2 for POWER_FULL;"
"3 for NO_BIND");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
...@@ -146,7 +153,7 @@ bool test_gemm_int8(bool tra, ...@@ -146,7 +153,7 @@ bool test_gemm_int8(bool tra,
1, 1,
tc_basic_fp32.numel()); tc_basic_fp32.numel());
} }
lite::test::Timer t0; Timer t0;
//! compute //! compute
double ops = 2.0 * m * n * k; double ops = 2.0 * m * n * k;
std::unique_ptr<paddle::lite::KernelContext> ctx1( std::unique_ptr<paddle::lite::KernelContext> ctx1(
...@@ -202,7 +209,7 @@ bool test_gemm_int8(bool tra, ...@@ -202,7 +209,7 @@ bool test_gemm_int8(bool tra,
t0.end(); t0.end();
} }
LOG(INFO) << "gemm_int8_int8 output: M: " << m << ", N: " << n << ", K: " << k LOG(INFO) << "gemm_int8_int8 output: M: " << m << ", N: " << n << ", K: " << k
<< ", cluster: " << cls << ", threads: " << ths << ", power_mode: " << cls << ", threads: " << ths
<< ", GOPS: " << ops * 1e-9f << ", GOPS: " << ops * 1e-9f
<< " GOPS, avg time: " << t0.get_average_ms() << " GOPS, avg time: " << t0.get_average_ms()
<< " ms, min time: " << t0.get_min_time() << " ms, min time: " << t0.get_min_time()
...@@ -229,7 +236,7 @@ bool test_gemm_int8(bool tra, ...@@ -229,7 +236,7 @@ bool test_gemm_int8(bool tra,
t0.end(); t0.end();
} }
LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", K: " << k LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", K: " << k
<< ", cluster: " << cls << ", threads: " << ths << ", power_mode: " << cls << ", threads: " << ths
<< ", GOPS: " << ops * 1e-9f << ", GOPS: " << ops * 1e-9f
<< " GOPS, avg time: " << t0.get_average_ms() << " GOPS, avg time: " << t0.get_average_ms()
<< " ms, min time: " << t0.get_min_time() << " ms, min time: " << t0.get_min_time()
...@@ -323,7 +330,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) { ...@@ -323,7 +330,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) {
k, k,
has_bias, has_bias,
has_relu, has_relu,
FLAGS_cluster, FLAGS_power_mode,
th); th);
if (flag) { if (flag) {
LOG(INFO) << "test m = " << m << ", n=" << n LOG(INFO) << "test m = " << m << ", n=" << n
...@@ -364,7 +371,7 @@ TEST(TestGemmInt8Custom, gemm_prepacked_int8_custom) { ...@@ -364,7 +371,7 @@ TEST(TestGemmInt8Custom, gemm_prepacked_int8_custom) {
FLAGS_K, FLAGS_K,
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
FLAGS_cluster, FLAGS_power_mode,
FLAGS_threads); FLAGS_threads);
if (!flag) { if (!flag) {
LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N
......
...@@ -25,8 +25,15 @@ ...@@ -25,8 +25,15 @@
#include "lite/tests/utils/timer.h" #include "lite/tests/utils/timer.h"
typedef paddle::lite::Tensor Tensor; typedef paddle::lite::Tensor Tensor;
using paddle::lite::Timer;
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(power_mode,
3,
"power mode: "
"0 for POWER_HIGH;"
"1 for POWER_LOW;"
"2 for POWER_FULL;"
"3 for NO_BIND");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times"); DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times"); DEFINE_int32(repeats, 1, "repeats times");
...@@ -128,7 +135,7 @@ bool test_sgemm(bool tra, ...@@ -128,7 +135,7 @@ bool test_sgemm(bool tra,
has_bias, has_bias,
has_relu); has_relu);
} }
lite::test::Timer t0; Timer t0;
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
//! compute //! compute
double ops = 2.0 * m * n * k; double ops = 2.0 * m * n * k;
...@@ -182,7 +189,7 @@ bool test_sgemm(bool tra, ...@@ -182,7 +189,7 @@ bool test_sgemm(bool tra,
t0.end(); t0.end();
} }
LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k
<< ", cluster: " << cls << ", threads: " << ths << ", power_mode: " << cls << ", threads: " << ths
<< ", GOPS: " << ops * 1e-9f << ", GOPS: " << ops * 1e-9f
<< " GOPS, avg time: " << t0.get_average_ms() << " GOPS, avg time: " << t0.get_average_ms()
<< " ms, min time: " << t0.get_min_time() << " ms, min time: " << t0.get_min_time()
...@@ -258,7 +265,7 @@ TEST(TestSgemm, test_func_sgemm_prepacked) { ...@@ -258,7 +265,7 @@ TEST(TestSgemm, test_func_sgemm_prepacked) {
beta, beta,
has_bias, has_bias,
has_relu, has_relu,
FLAGS_cluster, FLAGS_power_mode,
th); th);
if (flag) { if (flag) {
LOG(INFO) LOG(INFO)
...@@ -318,7 +325,7 @@ TEST(TestSgemmCustom, test_func_sgemm_prepacked_custom) { ...@@ -318,7 +325,7 @@ TEST(TestSgemmCustom, test_func_sgemm_prepacked_custom) {
FLAGS_beta, FLAGS_beta,
FLAGS_flag_bias, FLAGS_flag_bias,
FLAGS_flag_relu, FLAGS_flag_relu,
FLAGS_cluster, FLAGS_power_mode,
FLAGS_threads); FLAGS_threads);
if (!flag) { if (!flag) {
LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <list> #include <list>
namespace paddle {
namespace lite { namespace lite {
namespace test {
class Timer final { class Timer final {
public: public:
...@@ -34,12 +34,14 @@ class Timer final { ...@@ -34,12 +34,14 @@ class Timer final {
tend_ = std::chrono::system_clock::now(); tend_ = std::chrono::system_clock::now();
auto ts = auto ts =
std::chrono::duration_cast<std::chrono::microseconds>(tend_ - tstart_); std::chrono::duration_cast<std::chrono::microseconds>(tend_ - tstart_);
float elapse_ms = 1000.f * static_cast<float>(ts.count()) * latest_time_ = 1000.f * static_cast<float>(ts.count()) *
std::chrono::microseconds::period::num / std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den; std::chrono::microseconds::period::den;
ms_time_.push_back(elapse_ms); ms_time_.push_back(latest_time_);
} }
float latest_time() const { return latest_time_; }
float get_average_ms() { float get_average_ms() {
if (ms_time_.size() == 0) { if (ms_time_.size() == 0) {
return 0.f; return 0.f;
...@@ -96,7 +98,8 @@ class Timer final { ...@@ -96,7 +98,8 @@ class Timer final {
std::chrono::time_point<std::chrono::system_clock> tstart_; std::chrono::time_point<std::chrono::system_clock> tstart_;
std::chrono::time_point<std::chrono::system_clock> tend_; std::chrono::time_point<std::chrono::system_clock> tend_;
std::list<float> ms_time_; std::list<float> ms_time_;
float latest_time_;
}; };
} // namespace test
} // namespace lite } // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册