From 85487210f980e174ea2004e873dc3e26348c2fec Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 13 Jun 2019 12:14:55 +0000 Subject: [PATCH] refine and pass mul test --- paddle/fluid/lite/core/cpu_info.cc | 10 +- paddle/fluid/lite/kernels/arm/fc_compute.cc | 1 - paddle/fluid/lite/kernels/arm/mul_compute.cc | 18 ++- .../lite/kernels/arm/mul_compute_test.cc | 5 +- .../lite/kernels/arm/pool_compute_test.cc | 2 +- .../lite/kernels/arm/scale_compute_test.cc | 11 ++ paddle/fluid/lite/tools/build.sh | 120 +++++++++++------- 7 files changed, 106 insertions(+), 61 deletions(-) diff --git a/paddle/fluid/lite/core/cpu_info.cc b/paddle/fluid/lite/core/cpu_info.cc index df80f1c857..ab19682958 100644 --- a/paddle/fluid/lite/core/cpu_info.cc +++ b/paddle/fluid/lite/core/cpu_info.cc @@ -54,15 +54,15 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) { << ", cluster ID: " << dev->cluster_ids_[dev->core_ids_[i]] << ", CPU ARCH: A" << dev->archs_[i]; } - LOG(INFO) << "L1 DataCache size is: "; + VLOG(1) << "L1 DataCache size is: "; for (int i = 0; i < dev->compute_core_num_; ++i) { - LOG(INFO) << dev->L1_cache_[i] / 1024 << " KB"; + VLOG(1) << dev->L1_cache_[i] / 1024 << " KB"; } - LOG(INFO) << "L2 Cache size is: "; + VLOG(1) << "L2 Cache size is: "; for (int i = 0; i < dev->compute_core_num_; ++i) { - LOG(INFO) << dev->L2_cache_[i] / 1024 << " KB"; + VLOG(1) << dev->L2_cache_[i] / 1024 << " KB"; } - LOG(INFO) << "Total memory: " << dev->max_memory_ << "KB"; + VLOG(1) << "Total memory: " << dev->max_memory_ << "KB"; dev->max_freq_ = max_freq[0]; for (int j = 1; j < dev->compute_core_num_; ++j) { diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.cc b/paddle/fluid/lite/kernels/arm/fc_compute.cc index 5bf9faab9f..efd98008e7 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc @@ -52,7 +52,6 @@ void FcCompute::Run() { &ctx); lite::arm::math::sgemm_prepack(packed_in, w_data, b_data, o_data, x_h, n, x_w, false, false, false, &ctx); - if (param.bias) { CHECK_EQ(param.bias->numel(), n); lite::arm::math::fill_bias_fc(o_data, b_data, x_h, n); diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.cc b/paddle/fluid/lite/kernels/arm/mul_compute.cc index 4ca2c455e4..269e484225 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc @@ -33,7 +33,7 @@ void MulCompute::Run() { const auto* y_data = param.y->data(); auto* o_data = param.output->mutable_data(); - int x_h = static_cast( + int m = static_cast( param.x->dims().Slice(0, param.x_num_col_dims).production()); int x_w = static_cast(param.x->dims() @@ -41,22 +41,26 @@ void MulCompute::Run() { .production()); int y_h = static_cast( param.y->dims().Slice(0, param.y_num_col_dims).production()); - int y_w = + int n = static_cast(param.y->dims() .Slice(param.y_num_col_dims, param.y->dims().size()) .production()); CHECK_EQ(x_w, y_h) << "x_w must be equal with y_h"; - if (y_w == 1 || x_h == 1) { - lite::arm::math::sgemv(x_data, y_data, o_data, false, x_h, x_w, false, - nullptr, false); + auto k = x_w; + if (n == 1) { + lite::arm::math::sgemv(x_data, y_data, o_data, false, m, k, false, nullptr, + false); } else { constexpr bool is_tranposed_y = false; auto& ctx = this->ctx_->template As(); - lite::arm::math::sgemm_prepack(x_data, y_data, nullptr, o_data, x_h, y_w, - x_w, false, false, is_tranposed_y, &ctx); + float* packed_x = static_cast(ctx.workspace_data()) + + ctx.l2_cache_size() / sizeof(float); + lite::arm::math::prepackA(packed_x, x_data, k, 0, m, 0, k, false, &ctx); + lite::arm::math::sgemm_prepack(packed_x, y_data, nullptr, o_data, m, n, k, + false, false, is_tranposed_y, &ctx); } } diff --git a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc index 22cbfed5fd..9d40180848 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute_test.cc @@ -58,6 +58,7 @@ TEST(mul_arm, compare_test) { for (int m : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) { for (int k : {1, 2, 3, 4}) { + VLOG(3) << "m: " << m << ", n: " << n << ", k: " << k; lite::Tensor x, y, out, ref; x.Resize({m, k}); y.Resize({k, n}); @@ -71,8 +72,8 @@ TEST(mul_arm, compare_test) { FillData(x_data, x.dims().production()); FillData(y_data, y.dims().production()); - FillData(out_data, out.dims().production()); - FillData(ref_data, out.dims().production()); + FillData(out_data, out.dims().production(), 0, 0); + FillData(ref_data, out.dims().production(), 0, 0); MulCompute mul; operators::MulParam param; diff --git a/paddle/fluid/lite/kernels/arm/pool_compute_test.cc b/paddle/fluid/lite/kernels/arm/pool_compute_test.cc index 35873a9d2c..b024ccef9d 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/pool_compute_test.cc @@ -182,7 +182,7 @@ TEST(pool_arm, compute) { for (auto stride : {2}) { for (auto pad : {0}) { for (auto n : {1, 3, 4, 11}) { - for (auto c : {1, 3, 11, 4, 1024}) { + for (auto c : {1, 3, 11 /* ,1024 */}) { // speedup for ci for (auto h : {3, 1, 11, 4, 1}) { for (auto w : {1, 3, 4, 12, 1}) { VLOG(3) << "n:" << n << " c:" << c << " h:" << h << " w:" << w diff --git a/paddle/fluid/lite/kernels/arm/scale_compute_test.cc b/paddle/fluid/lite/kernels/arm/scale_compute_test.cc index fee47d7eb7..b127779228 100644 --- a/paddle/fluid/lite/kernels/arm/scale_compute_test.cc +++ b/paddle/fluid/lite/kernels/arm/scale_compute_test.cc @@ -54,6 +54,15 @@ TEST(scale_arm, compute) { lite::Tensor output; lite::Tensor output_ref; +#if 1 // for ci speedup + for (auto n : {1, 3}) { + for (auto c : {1, 3}) { + for (auto h : {3, 4}) { + for (auto w : {4, 3}) { + for (auto bias_after_scale : {true, false}) { + for (auto s : {-1.0f, 0.13f}) { + for (auto b : {-15.f, 0.11234f}) { +#else for (auto n : {1, 3, 4, 11}) { for (auto c : {1, 3, 11, 4}) { for (auto h : {3, 1, 11, 4}) { @@ -61,6 +70,8 @@ TEST(scale_arm, compute) { for (auto bias_after_scale : {true, false}) { for (auto s : {-100.25f, -1.0f, 0.13f, 3840.975f}) { for (auto b : {-3075.495f, -15.f, 0.11234f, 128.15f}) { +#endif + x.Resize(DDim(std::vector({n, c, h, w}))); output.Resize(DDim(std::vector({n, c, h, w}))); output_ref.Resize(DDim(std::vector({n, c, h, w}))); diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index c73f7bf952..a5be467ff3 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -43,10 +43,14 @@ function cmake_arm { -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 } +function build_single { + make $1 -j$(expr $(nproc) - 2) +} + function build { file=$1 for _test in $(cat $file); do - make $_test -j$(expr $(nproc) - 2) + build_single $_test done } @@ -63,39 +67,6 @@ function test_lite { done } -port_armv8=5554 -port_armv7=5556 - -# Run test on android -function test_lite_android { - local file=$1 - local adb_abi=$2 - local port= - if [[ ${adb_abi} == "armeabi-v7a" ]]; then - port=${port_armv7} - fi - - if [[ ${adb_abi} == "arm64-v8a" ]]; then - port=${port_armv8} - fi - if [[ "${port}x" == "x" ]]; then - echo "Port can not be empty" - exit 1 - fi - - echo "file: ${file}" - # push all to adb and test - adb_work_dir="/data/local/tmp" - skip_list="test_model_parser_lite" - for _test in $(cat $file); do - [[ $skip_list =~ (^|[[:space:]])$_test($|[[:space:]]) ]] && continue || echo 'skip $_test' - testpath=$(find ./paddle/fluid -name ${_test}) - adb -s emulator-${port} push ${testpath} ${adb_work_dir} - adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${_test}" - adb -s emulator-${port} shell "./${adb_work_dir}/${_test}" - done -} - # Build the code and run lite server tests. This is executed in the CI system. function build_test_server { mkdir -p ./build @@ -108,8 +79,34 @@ function build_test_server { build $LIBS_FILE } -# Build the code and run lite server tests. This is executed in the CI system. +# test_arm_android +function test_arm_android { + test_name=$1 + port=$2 + if [[ "${test_name}x" == "x" ]]; then + echo "test_name can not be empty" + exit 1 + fi + if [[ "${port}x" == "x" ]]; then + echo "Port can not be empty" + exit 1 + fi + + echo "test name: ${test_name}" + adb_work_dir="/data/local/tmp" + skip_list="test_model_parser_lite" # add more with space + [[ $skip_list =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && continue || echo 'skip $test_name' + testpath=$(find ./paddle/fluid -name ${test_name}) + adb -s emulator-${port} push ${testpath} ${adb_work_dir} + adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}" + adb -s emulator-${port} shell "./${adb_work_dir}/${test_name}" +} + +# Build the code and run lite arm tests. This is executed in the CI system. function build_test_arm { + port_armv8=5554 + port_armv7=5556 + adb kill-server adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done # start android arm64-v8a armeabi-v7a emulators first @@ -122,6 +119,7 @@ function build_test_arm { for os in "android" "armlinux" ; do for abi in "arm64-v8a" "armeabi-v7a" "armeabi-v7a-hf" ; do + # TODO(TJ): enable compile on v7-hf on andorid and all v7 on armlinux if [[ ${abi} == "armeabi-v7a-hf" ]]; then echo "armeabi-v7a-hf is not supported on both android and armlinux" continue @@ -138,17 +136,30 @@ function build_test_arm { cmake_arm ${os} ${abi} build $TESTS_FILE + # armlinux need in another docker + # TODO(TJ): enable test with armlinux if [[ ${os} == "android" ]]; then adb_abi=${abi} if [[ ${adb_abi} == "armeabi-v7a-hf" ]]; then adb_abi="armeabi-v7a" fi if [[ ${adb_abi} == "armeabi-v7a" ]]; then - # skip v7 tests + # skip all armv7 tests + # TODO(TJ): enable test with armv7 continue fi - test_lite_android $TESTS_FILE ${adb_abi} - # armlinux need in another docker + local port= + if [[ ${adb_abi} == "armeabi-v7a" ]]; then + port=${port_armv7} + fi + + if [[ ${adb_abi} == "arm64-v8a" ]]; then + port=${port_armv8} + fi + echo "test file: ${TESTS_FILE}" + for _test in $(cat $TESTS_FILE); do + test_arm_android $_test $port + done fi cd - done @@ -164,12 +175,13 @@ function print_usage { echo "----------------------------------------" echo -e "cmake_x86: run cmake with X86 mode" echo -e "cmake_cuda: run cmake with CUDA mode" - echo -e "cmake_arm: run cmake with ARM mode" + echo -e "--arm_os= --arm_abi= cmake_arm: run cmake with ARM mode" echo echo -e "build: compile the tests" + echo -e "--test_name= build_single: compile single test" echo echo -e "test_server: run server tests" - echo -e "test_mobile: run mobile tests" + echo -e "--test_name= --adb_port_number= test_arm_android: run arm test" echo "----------------------------------------" echo } @@ -182,11 +194,31 @@ function main { TESTS_FILE="${i#*=}" shift ;; + --test_name=*) + TEST_NAME="${i#*=}" + shift + ;; + --arm_os=*) + ARM_OS="${i#*=}" + shift + ;; + --arm_abi=*) + ARM_ABI="${i#*=}" + shift + ;; + --arm_port=*) + ARM_PORT="${i#*=}" + shift + ;; build) build $TESTS_FILE build $LIBS_FILE shift ;; + build_single) + build_single $TEST_NAME + shift + ;; cmake_x86) cmake_x86 shift @@ -196,15 +228,15 @@ function main { shift ;; cmake_arm) - cmake_arm $2 $3 + cmake_arm $ARM_OS $ARM_ABI shift ;; test_server) test_lite $TESTS_FILE shift ;; - test_mobile) - test_lite $TESTS_FILE + test_arm_android) + test_arm_android $TEST_NAME $ARM_PORT shift ;; build_test_server) @@ -224,7 +256,5 @@ function main { done } -print_usage - main $@ -- GitLab