[LITE][PROFILE] Fix Precison Profiler caused by X86 header (#3506)

* [LITE] Set high perf mode as default in mobile_light deme; Fix Precision Profiler caused by X86 header; Enhance profiler for opencl. test=develop

[LITE][PROFILE] Fix Precison Profiler caused by X86 header (#3506)
* [LITE] Set high perf mode as default in mobile_light deme; Fix Precision Profiler caused by X86 header; Enhance profiler for opencl. test=develop
522a9ee7 · Yuan Shuai · GitHub · 595e0d50 · 522a9ee7 · 522a9ee7
3 changed file
--- a/lite/core/kernel.h
+++ b/lite/core/kernel.h
@@ -90,6 +90,9 @@ class KernelBase {
    profiler_->StopTiming(profile::Type::kCreate, profile_id_, ctx_.get());
    profiler_->StartTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
    Run();
+#ifdef LITE_WITH_OPENCL
+    CLRuntime::Global()->command_queue().finish();
+#endif
    profiler_->StopTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
 #else
    Run();

--- a/lite/core/profile/precision_profiler.h
+++ b/lite/core/profile/precision_profiler.h
@@ -22,7 +22,9 @@
 #include <string>
 #include <vector>
 #include "lite/core/program.h"
+#ifdef LITE_WITH_X86
 #include "lite/fluid/float16.h"
+#endif
 #ifdef LITE_WITH_OPENCL
 #include "lite/backends/opencl/cl_image_converter.h"

--- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
+++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
@@ -71,15 +71,17 @@ inline double GetCurrentUS() {
 void RunModel(std::string model_dir,
              const shape_t& input_shape,
-              int repeats,
+              size_t repeats,
-              int warmup,
+              size_t warmup,
-              int print_output_elem) {
+              size_t print_output_elem,
+              size_t power_mode) {
  // 1. Set MobileConfig
  MobileConfig config;
  config.set_model_from_file(model_dir);
  // NOTE: To load model transformed by model_optimize_tool before
  // release/v2.3.0, plese use `set_model_dir` API as listed below.
  // config.set_model_dir(model_dir);
+  config.set_power_mode(static_cast<paddle::lite_api::PowerMode>(power_mode));
  // 2. Create PaddlePredictor by MobileConfig
  std::shared_ptr<PaddlePredictor> predictor =
@@ -187,8 +189,15 @@ int main(int argc, char** argv) {
    warmup = atoi(argv[7]);
    print_output_elem = atoi(argv[8]);
  }
+  // set arm power mode:
-  RunModel(model_dir, input_shape, repeats, warmup, print_output_elem);
+  // 0 for big cluster, high performance
+  // 1 for little cluster
+  // 2 for all cores
+  // 3 for no bind
+  size_t power_mode = 0;
+  RunModel(
+      model_dir, input_shape, repeats, warmup, print_output_elem, power_mode);
  return 0;
 }