diff --git a/lite/core/kernel.h b/lite/core/kernel.h
index ff848dae9e4ad6e8aaef70432301033406633db6..777d6665e134aef6549b0770d14640d894c02fd7 100644
--- a/lite/core/kernel.h
+++ b/lite/core/kernel.h
@@ -90,6 +90,9 @@ class KernelBase {
     profiler_->StopTiming(profile::Type::kCreate, profile_id_, ctx_.get());
     profiler_->StartTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
     Run();
+#ifdef LITE_WITH_OPENCL
+    CLRuntime::Global()->command_queue().finish();
+#endif
     profiler_->StopTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
 #else
     Run();
diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h
index 0eebf6a61016a3b399b7a7d4de26a4303f741440..1176608b4c4121e9e03b2b0168e80e2a0d6bc98c 100644
--- a/lite/core/profile/precision_profiler.h
+++ b/lite/core/profile/precision_profiler.h
@@ -22,7 +22,9 @@
 #include <string>
 #include <vector>
 #include "lite/core/program.h"
+#ifdef LITE_WITH_X86
 #include "lite/fluid/float16.h"
+#endif
 
 #ifdef LITE_WITH_OPENCL
 #include "lite/backends/opencl/cl_image_converter.h"
diff --git a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
index 150bcd231c27c25d8510fc8dfa3281a8351514dd..3d09c071aa7ecbe51f1723cad314f2aedcdb2bd7 100644
--- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
+++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
@@ -71,15 +71,17 @@ inline double GetCurrentUS() {
 
 void RunModel(std::string model_dir,
               const shape_t& input_shape,
-              int repeats,
-              int warmup,
-              int print_output_elem) {
+              size_t repeats,
+              size_t warmup,
+              size_t print_output_elem,
+              size_t power_mode) {
   // 1. Set MobileConfig
   MobileConfig config;
   config.set_model_from_file(model_dir);
   // NOTE: To load model transformed by model_optimize_tool before
   // release/v2.3.0, plese use `set_model_dir` API as listed below.
   // config.set_model_dir(model_dir);
+  config.set_power_mode(static_cast<paddle::lite_api::PowerMode>(power_mode));
 
   // 2. Create PaddlePredictor by MobileConfig
   std::shared_ptr<PaddlePredictor> predictor =
@@ -187,8 +189,15 @@ int main(int argc, char** argv) {
     warmup = atoi(argv[7]);
     print_output_elem = atoi(argv[8]);
   }
-
-  RunModel(model_dir, input_shape, repeats, warmup, print_output_elem);
+  // set arm power mode:
+  // 0 for big cluster, high performance
+  // 1 for little cluster
+  // 2 for all cores
+  // 3 for no bind
+  size_t power_mode = 0;
+
+  RunModel(
+      model_dir, input_shape, repeats, warmup, print_output_elem, power_mode);
 
   return 0;
 }