diff --git a/lite/core/kernel.h b/lite/core/kernel.h index ff848dae9e4ad6e8aaef70432301033406633db6..777d6665e134aef6549b0770d14640d894c02fd7 100644 --- a/lite/core/kernel.h +++ b/lite/core/kernel.h @@ -90,6 +90,9 @@ class KernelBase { profiler_->StopTiming(profile::Type::kCreate, profile_id_, ctx_.get()); profiler_->StartTiming(profile::Type::kDispatch, profile_id_, ctx_.get()); Run(); +#ifdef LITE_WITH_OPENCL + CLRuntime::Global()->command_queue().finish(); +#endif profiler_->StopTiming(profile::Type::kDispatch, profile_id_, ctx_.get()); #else Run(); diff --git a/lite/core/profile/precision_profiler.h b/lite/core/profile/precision_profiler.h index 0eebf6a61016a3b399b7a7d4de26a4303f741440..1176608b4c4121e9e03b2b0168e80e2a0d6bc98c 100644 --- a/lite/core/profile/precision_profiler.h +++ b/lite/core/profile/precision_profiler.h @@ -22,7 +22,9 @@ #include #include #include "lite/core/program.h" +#ifdef LITE_WITH_X86 #include "lite/fluid/float16.h" +#endif #ifdef LITE_WITH_OPENCL #include "lite/backends/opencl/cl_image_converter.h" diff --git a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc index 150bcd231c27c25d8510fc8dfa3281a8351514dd..3d09c071aa7ecbe51f1723cad314f2aedcdb2bd7 100644 --- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc +++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc @@ -71,15 +71,17 @@ inline double GetCurrentUS() { void RunModel(std::string model_dir, const shape_t& input_shape, - int repeats, - int warmup, - int print_output_elem) { + size_t repeats, + size_t warmup, + size_t print_output_elem, + size_t power_mode) { // 1. Set MobileConfig MobileConfig config; config.set_model_from_file(model_dir); // NOTE: To load model transformed by model_optimize_tool before // release/v2.3.0, plese use `set_model_dir` API as listed below. // config.set_model_dir(model_dir); + config.set_power_mode(static_cast(power_mode)); // 2. Create PaddlePredictor by MobileConfig std::shared_ptr predictor = @@ -187,8 +189,15 @@ int main(int argc, char** argv) { warmup = atoi(argv[7]); print_output_elem = atoi(argv[8]); } - - RunModel(model_dir, input_shape, repeats, warmup, print_output_elem); + // set arm power mode: + // 0 for big cluster, high performance + // 1 for little cluster + // 2 for all cores + // 3 for no bind + size_t power_mode = 0; + + RunModel( + model_dir, input_shape, repeats, warmup, print_output_elem, power_mode); return 0; }