From edb4ea9a789f0ff59889d5abbe379e5da2546f81 Mon Sep 17 00:00:00 2001 From: liu zhengxi <380185688@qq.com> Date: Thu, 24 Oct 2019 18:35:07 +0800 Subject: [PATCH] Make inceptionv4, resnet50, googlenet can run on x86 paltform (#2250) * make inceptionv4, resnet50, googlenet can run on x86 paltform and fix the compare part in x86 unittests, test=develop * fix googlenet tests for benchmark record, test=develop * [framework][profile] fix profile dump bug when op is feed and fetch test=develop (sangoly) --- lite/api/CMakeLists.txt | 5 ++ lite/api/test_googlenet_lite.cc | 34 ++++---- lite/api/test_resnet50_lite_x86.cc | 91 +++++++++++++++++++++ lite/core/kernel.h | 5 +- lite/core/program.cc | 4 +- lite/core/program.h | 22 ++--- lite/kernels/x86/batch_norm_compute.h | 1 + lite/kernels/x86/batch_norm_compute_test.cc | 2 +- lite/kernels/x86/conv_compute_test.cc | 3 +- lite/kernels/x86/pool_compute_test.cc | 3 + lite/tools/ci_build.sh | 4 +- 11 files changed, 139 insertions(+), 35 deletions(-) create mode 100644 lite/api/test_resnet50_lite_x86.cc diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index 5e0fa705a9..fc6d2dd66b 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -119,6 +119,11 @@ if(WITH_TESTING) ${ops} ${host_kernels} ${x86_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/inception_v4_simple) add_dependencies(test_inceptionv4_lite_x86 extern_lite_download_inception_v4_simple_tar_gz) + lite_cc_test(test_resnet50_lite_x86 SRCS test_resnet50_lite_x86.cc + DEPS cxx_api mir_passes lite_api_test_helper + ${ops} ${host_kernels} ${x86_kernels} + ARGS --model_dir=${LITE_MODEL_DIR}/resnet50) + add_dependencies(test_resnet50_lite_x86 extern_lite_download_resnet50_tar_gz) endif() endif() diff --git a/lite/api/test_googlenet_lite.cc b/lite/api/test_googlenet_lite.cc index 952892f7c4..2b32f7d558 100644 --- a/lite/api/test_googlenet_lite.cc +++ b/lite/api/test_googlenet_lite.cc @@ -12,20 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - #include #include #include @@ -34,12 +20,10 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" #include "lite/core/op_registry.h" #include "lite/core/tensor.h" -// for googlenet -DEFINE_string(model_dir, "", ""); - namespace paddle { namespace lite { #ifdef LITE_WITH_X86 @@ -57,7 +41,21 @@ TEST(CXXApi, test_lite_googlenet) { for (int i = 0; i < input_tensor->dims().production(); i++) { data[i] = 1; } - predictor.Run(); + + for (int i = 0; i < FLAGS_warmup; ++i) { + predictor.Run(); + } + + auto start = GetCurrentUS(); + for (int i = 0; i < FLAGS_repeats; ++i) { + predictor.Run(); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats + << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0 + << " ms in average."; auto* out = predictor.GetOutput(0); std::vector results( diff --git a/lite/api/test_resnet50_lite_x86.cc b/lite/api/test_resnet50_lite_x86.cc new file mode 100644 index 0000000000..05012cc852 --- /dev/null +++ b/lite/api/test_resnet50_lite_x86.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/cxx_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { + +TEST(Resnet50, test_resnet50_lite_x86) { + // DeviceInfo::Init(); + // DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); + lite::Predictor predictor; + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kFloat)}}); + + std::string model_dir = FLAGS_model_dir; + std::vector passes({"static_kernel_pick_pass", + "variable_place_inference_pass", + "type_target_cast_pass", + "variable_place_inference_pass", + "io_copy_kernel_pick_pass", + "variable_place_inference_pass", + "runtime_context_assign_pass"}); + predictor.Build(model_dir, "", "", valid_places, passes); + auto* input_tensor = predictor.GetInput(0); + input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); + auto* data = input_tensor->mutable_data(); + auto item_size = input_tensor->dims().production(); + for (int i = 0; i < item_size; i++) { + data[i] = 1; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + predictor.Run(); + } + + auto start = GetCurrentUS(); + for (int i = 0; i < FLAGS_repeats; ++i) { + predictor.Run(); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats + << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0 + << " ms in average."; + + std::vector> results; + // i = 1 + results.emplace_back(std::vector( + {0.00024139918, 0.00020566184, 0.00022418296, 0.00041731037, + 0.0005366107, 0.00016948722, 0.00028638865, 0.0009257241, + 0.00072681636, 8.531815e-05, 0.0002129998, 0.0021168243, + 0.006387163, 0.0037145028, 0.0012812682, 0.00045948103, + 0.00013535398, 0.0002483765, 0.00076759676, 0.0002773295})); + auto* out = predictor.GetOutput(0); + ASSERT_EQ(out->dims().size(), 2); + ASSERT_EQ(out->dims()[0], 1); + ASSERT_EQ(out->dims()[1], 1000); + + int step = 50; + for (int i = 0; i < results.size(); ++i) { + for (int j = 0; j < results[i].size(); ++j) { + EXPECT_NEAR(out->data()[j * step + (out->dims()[1] * i)], + results[i][j], + 1e-6); + } + } +} + +} // namespace lite +} // namespace paddle diff --git a/lite/core/kernel.h b/lite/core/kernel.h index 113ce967d5..176f6c69ac 100644 --- a/lite/core/kernel.h +++ b/lite/core/kernel.h @@ -82,8 +82,9 @@ class KernelBase { #endif #ifdef LITE_WITH_PROFILE - CHECK_GE(profile_id_, 0) << "Must set profile id first"; - profile::ProfileBlock x(profile_id_, "kernel"); + if (profile_id_ >= 0) { + profile::ProfileBlock x(profile_id_, "kernel"); + } #endif Run(); } diff --git a/lite/core/program.cc b/lite/core/program.cc index f5238f25ed..014cfb1d24 100644 --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -184,7 +184,9 @@ void Instruction::Run() { CHECK(op_) << "op null"; CHECK(kernel_) << "kernel null"; #ifdef LITE_WITH_PROFILE - profile::ProfileBlock x(profile_id_, "instruction"); + if (profile_id_ >= 0) { + profile::ProfileBlock x(profile_id_, "instruction"); + } #endif // LITE_WITH_PROFILE if (first_epoch_) { first_epoch_ = false; diff --git a/lite/core/program.h b/lite/core/program.h index f0f5304c08..7a6700da61 100644 --- a/lite/core/program.h +++ b/lite/core/program.h @@ -89,16 +89,18 @@ struct Instruction { std::unique_ptr&& kernel) : op_(op), kernel_(std::move(kernel)) { #ifdef LITE_WITH_PROFILE - profile_id_ = profile::BasicProfiler::Global() - .NewRcd(kernel_->SerializedKernelType()) - .id(); - kernel_->SetProfileID(profile_id_); - // Set profile custom info - auto& profiler = - *profile::BasicProfiler::Global().mutable_record( - profile_id_); - profiler.SetCustomInfo("op_type", op_->Type()); - profiler.SetCustomInfo("op_info", op_->SerializedOpInfo()); + if (op_->Type() != "feed" && op_->Type() != "fetch") { + profile_id_ = profile::BasicProfiler::Global() + .NewRcd(kernel_->SerializedKernelType()) + .id(); + kernel_->SetProfileID(profile_id_); + // Set profile custom info + auto& profiler = + *profile::BasicProfiler::Global().mutable_record( + profile_id_); + profiler.SetCustomInfo("op_type", op_->Type()); + profiler.SetCustomInfo("op_info", op_->SerializedOpInfo()); + } #endif // LITE_WITH_PROFILE } diff --git a/lite/kernels/x86/batch_norm_compute.h b/lite/kernels/x86/batch_norm_compute.h index 9190a407df..092280752c 100644 --- a/lite/kernels/x86/batch_norm_compute.h +++ b/lite/kernels/x86/batch_norm_compute.h @@ -46,6 +46,7 @@ class BatchNormCompute : public KernelLite { void Run() override { // auto &context = ctx_->As(); auto ¶m = *param_.get_mutable(); + param.is_test = true; bool global_stats = param.is_test || param.use_global_stats; const auto *x = param.x; diff --git a/lite/kernels/x86/batch_norm_compute_test.cc b/lite/kernels/x86/batch_norm_compute_test.cc index e4c3268519..5ec2cdcdda 100644 --- a/lite/kernels/x86/batch_norm_compute_test.cc +++ b/lite/kernels/x86/batch_norm_compute_test.cc @@ -104,7 +104,7 @@ TEST(batch_norm_x86, run_test) { operators::BatchNormParam param; param.x = &x; - param.is_test = false; + param.is_test = true; param.scale = &scale; param.bias = &bias; param.mean = &mean; diff --git a/lite/kernels/x86/conv_compute_test.cc b/lite/kernels/x86/conv_compute_test.cc index d784018446..f2dde962b9 100644 --- a/lite/kernels/x86/conv_compute_test.cc +++ b/lite/kernels/x86/conv_compute_test.cc @@ -84,8 +84,9 @@ TEST(conv2d_x86, run_test) { conv2d.Run(); LOG(INFO) << "output: "; + float ref_result[1] = {27.}; for (int i = 0; i < out.dims().production(); i++) { - LOG(INFO) << out_data[i] << " "; + EXPECT_NEAR(out_data[i], ref_result[i], 1e-5); } } diff --git a/lite/kernels/x86/pool_compute_test.cc b/lite/kernels/x86/pool_compute_test.cc index 821ae2e12c..87b75a0760 100644 --- a/lite/kernels/x86/pool_compute_test.cc +++ b/lite/kernels/x86/pool_compute_test.cc @@ -70,8 +70,11 @@ TEST(pool2d_x86, run_test) { pool2d.Run(); LOG(INFO) << "output: "; + float ref_result[12] = { + 5., 7., 13., 15., 21., 23., 29., 31., 37., 39., 45., 47.}; for (int i = 0; i < out.dims().production(); i++) { LOG(INFO) << out_data[i]; + EXPECT_NEAR(out_data[i], ref_result[i], 1e-5); } } diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 17ec0fdccd..0e8f75f10a 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -194,9 +194,9 @@ function build { function test_server { # Due to the missing of x86 kernels, we skip the following tests temporarily. # TODO(xxx) clear the skip list latter - local skip_list=("test_paddle_api" "test_cxx_api" "test_googlenet" + local skip_list=("test_paddle_api" "test_cxx_api" "test_mobilenetv1_lite_x86" "test_mobilenetv2_lite_x86" - "test_inceptionv4_lite_x86" "test_light_api" + "test_light_api" "test_apis" "test_model_bin" ) local to_skip=0 -- GitLab