Make inceptionv4, resnet50, googlenet can run on x86 paltform (#2250)

* make inceptionv4, resnet50, googlenet can run on x86 paltform and fix the compare part in x86 unittests, test=develop * fix googlenet tests for benchmark record, test=develop * [framework][profile] fix profile dump bug when op is feed and fetch test=develop (sangoly)

Make inceptionv4, resnet50, googlenet can run on x86 paltform (#2250)
* make inceptionv4, resnet50, googlenet can run on x86 paltform and fix the compare part in x86 unittests, test=develop * fix googlenet tests for benchmark record, test=develop * [framework][profile] fix profile dump bug when op is feed and fetch test=develop (sangoly)
edb4ea9a · liu zhengxi · GitHub · f0f3e90b · edb4ea9a · edb4ea9a
11 changed file
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -119,6 +119,11 @@ if(WITH_TESTING)
           ${ops} ${host_kernels} ${x86_kernels}
           ARGS --model_dir=${LITE_MODEL_DIR}/inception_v4_simple)
        add_dependencies(test_inceptionv4_lite_x86 extern_lite_download_inception_v4_simple_tar_gz)
+        lite_cc_test(test_resnet50_lite_x86 SRCS test_resnet50_lite_x86.cc
+           DEPS cxx_api mir_passes lite_api_test_helper
+           ${ops} ${host_kernels} ${x86_kernels}
+           ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
+        add_dependencies(test_resnet50_lite_x86 extern_lite_download_resnet50_tar_gz)
    endif()
 endif()


--- a/lite/api/test_googlenet_lite.cc
+++ b/lite/api/test_googlenet_lite.cc
@@ -12,20 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
 #include <vector>
@@ -34,12 +20,10 @@
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
+#include "lite/api/test_helper.h"
 #include "lite/core/op_registry.h"
 #include "lite/core/tensor.h"

-// for googlenet
-DEFINE_string(model_dir, "", "");
-
 namespace paddle {
 namespace lite {
 #ifdef LITE_WITH_X86
@@ -57,7 +41,21 @@ TEST(CXXApi, test_lite_googlenet) {
  for (int i = 0; i < input_tensor->dims().production(); i++) {
    data[i] = 1;
  }
+
+  for (int i = 0; i < FLAGS_warmup; ++i) {
+    predictor.Run();
+  }
+
+  auto start = GetCurrentUS();
+  for (int i = 0; i < FLAGS_repeats; ++i) {
    predictor.Run();
+  }
+
+  LOG(INFO) << "================== Speed Report ===================";
+  LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
+            << ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
+            << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
+            << " ms in average.";

  auto* out = predictor.GetOutput(0);
  std::vector<float> results(

--- a/lite/api/test_resnet50_lite_x86.cc
+++ b/lite/api/test_resnet50_lite_x86.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+#include <vector>
+#include "lite/api/cxx_api.h"
+#include "lite/api/paddle_use_kernels.h"
+#include "lite/api/paddle_use_ops.h"
+#include "lite/api/paddle_use_passes.h"
+#include "lite/api/test_helper.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+
+TEST(Resnet50, test_resnet50_lite_x86) {
+  // DeviceInfo::Init();
+  // DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
+  lite::Predictor predictor;
+  std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)},
+                                   Place{TARGET(kHost), PRECISION(kFloat)}});
+
+  std::string model_dir = FLAGS_model_dir;
+  std::vector<std::string> passes({"static_kernel_pick_pass",
+                                   "variable_place_inference_pass",
+                                   "type_target_cast_pass",
+                                   "variable_place_inference_pass",
+                                   "io_copy_kernel_pick_pass",
+                                   "variable_place_inference_pass",
+                                   "runtime_context_assign_pass"});
+  predictor.Build(model_dir, "", "", valid_places, passes);
+  auto* input_tensor = predictor.GetInput(0);
+  input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
+  auto* data = input_tensor->mutable_data<float>();
+  auto item_size = input_tensor->dims().production();
+  for (int i = 0; i < item_size; i++) {
+    data[i] = 1;
+  }
+
+  for (int i = 0; i < FLAGS_warmup; ++i) {
+    predictor.Run();
+  }
+
+  auto start = GetCurrentUS();
+  for (int i = 0; i < FLAGS_repeats; ++i) {
+    predictor.Run();
+  }
+
+  LOG(INFO) << "================== Speed Report ===================";
+  LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
+            << ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
+            << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
+            << " ms in average.";
+
+  std::vector<std::vector<float>> results;
+  // i = 1
+  results.emplace_back(std::vector<float>(
+      {0.00024139918, 0.00020566184, 0.00022418296, 0.00041731037,
+       0.0005366107,  0.00016948722, 0.00028638865, 0.0009257241,
+       0.00072681636, 8.531815e-05,  0.0002129998,  0.0021168243,
+       0.006387163,   0.0037145028,  0.0012812682,  0.00045948103,
+       0.00013535398, 0.0002483765,  0.00076759676, 0.0002773295}));
+  auto* out = predictor.GetOutput(0);
+  ASSERT_EQ(out->dims().size(), 2);
+  ASSERT_EQ(out->dims()[0], 1);
+  ASSERT_EQ(out->dims()[1], 1000);
+
+  int step = 50;
+  for (int i = 0; i < results.size(); ++i) {
+    for (int j = 0; j < results[i].size(); ++j) {
+      EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
+                  results[i][j],
+                  1e-6);
+    }
+  }
+}
+
+}  // namespace lite
+}  // namespace paddle
--- a/lite/core/kernel.h
+++ b/lite/core/kernel.h
@@ -82,8 +82,9 @@ class KernelBase {
 #endif

 #ifdef LITE_WITH_PROFILE
-    CHECK_GE(profile_id_, 0) << "Must set profile id first";
+    if (profile_id_ >= 0) {
      profile::ProfileBlock x(profile_id_, "kernel");
+    }
 #endif
    Run();
  }

--- a/lite/core/program.cc
+++ b/lite/core/program.cc
@@ -184,7 +184,9 @@ void Instruction::Run() {
  CHECK(op_) << "op null";
  CHECK(kernel_) << "kernel null";
 #ifdef LITE_WITH_PROFILE
+  if (profile_id_ >= 0) {
    profile::ProfileBlock x(profile_id_, "instruction");
+  }
 #endif  // LITE_WITH_PROFILE
  if (first_epoch_) {
    first_epoch_ = false;

--- a/lite/core/program.h
+++ b/lite/core/program.h
@@ -89,6 +89,7 @@ struct Instruction {
              std::unique_ptr<KernelBase>&& kernel)
      : op_(op), kernel_(std::move(kernel)) {
 #ifdef LITE_WITH_PROFILE
+    if (op_->Type() != "feed" && op_->Type() != "fetch") {
      profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
                        .NewRcd(kernel_->SerializedKernelType())
                        .id();
@@ -99,6 +100,7 @@ struct Instruction {
              profile_id_);
      profiler.SetCustomInfo("op_type", op_->Type());
      profiler.SetCustomInfo("op_info", op_->SerializedOpInfo());
+    }
 #endif  // LITE_WITH_PROFILE
  }


--- a/lite/kernels/x86/batch_norm_compute.h
+++ b/lite/kernels/x86/batch_norm_compute.h
@@ -46,6 +46,7 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
  void Run() override {
    // auto &context = ctx_->As<X86Context>();
    auto &param = *param_.get_mutable<operators::BatchNormParam>();
+    param.is_test = true;
    bool global_stats = param.is_test || param.use_global_stats;

    const auto *x = param.x;

--- a/lite/kernels/x86/batch_norm_compute_test.cc
+++ b/lite/kernels/x86/batch_norm_compute_test.cc
@@ -104,7 +104,7 @@ TEST(batch_norm_x86, run_test) {
  operators::BatchNormParam param;

  param.x = &x;
-  param.is_test = false;
+  param.is_test = true;
  param.scale = &scale;
  param.bias = &bias;
  param.mean = &mean;

--- a/lite/kernels/x86/conv_compute_test.cc
+++ b/lite/kernels/x86/conv_compute_test.cc
@@ -84,8 +84,9 @@ TEST(conv2d_x86, run_test) {
  conv2d.Run();

  LOG(INFO) << "output: ";
+  float ref_result[1] = {27.};
  for (int i = 0; i < out.dims().production(); i++) {
-    LOG(INFO) << out_data[i] << " ";
+    EXPECT_NEAR(out_data[i], ref_result[i], 1e-5);
  }
 }


--- a/lite/kernels/x86/pool_compute_test.cc
+++ b/lite/kernels/x86/pool_compute_test.cc
@@ -70,8 +70,11 @@ TEST(pool2d_x86, run_test) {
  pool2d.Run();

  LOG(INFO) << "output: ";
+  float ref_result[12] = {
+      5., 7., 13., 15., 21., 23., 29., 31., 37., 39., 45., 47.};
  for (int i = 0; i < out.dims().production(); i++) {
    LOG(INFO) << out_data[i];
+    EXPECT_NEAR(out_data[i], ref_result[i], 1e-5);
  }
 }


--- a/lite/tools/ci_build.sh
+++ b/lite/tools/ci_build.sh
@@ -194,9 +194,9 @@ function build {
 function test_server {
    # Due to the missing of x86 kernels, we skip the following tests temporarily.
    # TODO(xxx) clear the skip list latter
-    local skip_list=("test_paddle_api" "test_cxx_api" "test_googlenet"
+    local skip_list=("test_paddle_api" "test_cxx_api"
                     "test_mobilenetv1_lite_x86" "test_mobilenetv2_lite_x86"
-                     "test_inceptionv4_lite_x86" "test_light_api"
+                     "test_light_api"
                     "test_apis" "test_model_bin"
                    )
    local to_skip=0