From 17833acb8d3a1e78b0a58bb842bf2378d585976b Mon Sep 17 00:00:00 2001 From: sangoly Date: Wed, 16 Oct 2019 16:06:21 +0800 Subject: [PATCH] [framework][place] remove prefered_place and kHost in valid_places (#2192) * [framework][place] remove prefered_place, use place order in valid_place array instead test=develop * remove kHost from valid_places test=develop --- .../api/android/jni/native/convert_util_jni.h | 9 -- .../src/com/baidu/paddle/lite/CxxConfig.java | 9 -- lite/api/apis_test.cc | 14 +-- lite/api/benchmark.cc | 2 - lite/api/cxx_api.cc | 16 ++-- lite/api/cxx_api.h | 10 +- lite/api/cxx_api_bin.cc | 6 +- lite/api/cxx_api_impl.cc | 1 - lite/api/cxx_api_test.cc | 28 ++---- lite/api/detection_model_test.cc | 1 - lite/api/efficientnet_b0_test.cc | 13 +-- lite/api/inceptionv4_test.cc | 9 +- lite/api/lite_api_test_helper.cc | 12 +-- lite/api/mobilenetv1_int8_test.cc | 10 +- lite/api/mobilenetv1_ssd_test.cc | 6 +- lite/api/mobilenetv1_test.cc | 22 ++--- lite/api/mobilenetv1_yolov3_test.cc | 8 +- lite/api/mobilenetv2_test.cc | 22 ++--- lite/api/model_optimize_tool.cc | 5 +- lite/api/model_run_test_image.cc | 6 +- lite/api/model_test.cc | 2 - lite/api/ocr_attention_test.cc | 9 +- lite/api/paddle_api.h | 3 - lite/api/paddle_api_test.cc | 1 - lite/api/resnet18_test.cc | 9 +- lite/api/resnet50_test.cc | 13 +-- lite/api/resnet50_test_fpga.cc | 3 +- lite/api/shufflenetv2_test.cc | 8 +- lite/api/test_googlenet_lite.cc | 6 +- lite/api/test_inceptionv4_lite_x86.cc | 10 +- lite/api/test_mobilenetv1_lite_x86.cc | 10 +- lite/api/test_mobilenetv2_lite_x86.cc | 11 +-- lite/api/unet_test.cc | 9 +- lite/core/mir/fusion/fc_fuse_pass_test.cc | 9 +- lite/core/mir/static_kernel_pick_pass.cc | 10 +- lite/core/mir/static_kernel_pick_pass.h | 91 ++++++++++++------- .../generate_npu_program_pass_test.cc | 10 +- .../mir/variable_place_inference_pass_test.cc | 12 --- lite/core/optimizer.h | 7 -- .../cxx/mobile_full/mobilenetv1_full_api.cc | 6 +- lite/gen_code/gen_code.h | 2 +- lite/tools/debug/model_debug_tool.cc | 19 +--- 42 files changed, 150 insertions(+), 319 deletions(-) diff --git a/lite/api/android/jni/native/convert_util_jni.h b/lite/api/android/jni/native/convert_util_jni.h index c9e8e2e4a6..5e5d3723e4 100644 --- a/lite/api/android/jni/native/convert_util_jni.h +++ b/lite/api/android/jni/native/convert_util_jni.h @@ -145,8 +145,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) { jmethodID model_dir_method = env->GetMethodID(cxxconfig_jclazz, "getModelDir", "()Ljava/lang/String;"); - jmethodID preferred_place_method = env->GetMethodID( - cxxconfig_jclazz, "getPreferredPlace", "()Lcom/baidu/paddle/lite/Place;"); jmethodID valid_places_method = env->GetMethodID( cxxconfig_jclazz, "getValidPlaces", "()[Lcom/baidu/paddle/lite/Place;"); @@ -159,13 +157,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) { config.set_model_dir(cpp_model_dir); } - jobject java_preferred_place = - env->CallObjectMethod(jcxxconfig, preferred_place_method); - if (java_preferred_place != nullptr) { - Place cpp_preferred_place = jplace_to_cpp_place(env, java_preferred_place); - config.set_preferred_place(cpp_preferred_place); - } - jobject object_valid_places = env->CallObjectMethod(jcxxconfig, valid_places_method); jobjectArray *java_valid_places = diff --git a/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java b/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java index 906293c92f..3f68ef8922 100644 --- a/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java +++ b/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java @@ -18,17 +18,8 @@ package com.baidu.paddle.lite; */ public class CxxConfig extends ConfigBase { - protected Place preferredPlace; protected Place[] validPlaces; - public Place getPreferredPlace() { - return preferredPlace; - } - - public void setPreferredPlace(Place preferredPlace) { - this.preferredPlace = preferredPlace; - } - public Place[] getValidPlaces() { return validPlaces; } diff --git a/lite/api/apis_test.cc b/lite/api/apis_test.cc index 3dc0224084..ac2c385d53 100644 --- a/lite/api/apis_test.cc +++ b/lite/api/apis_test.cc @@ -51,17 +51,12 @@ bool CompareTensors(const std::string& name, TEST(CXXApi_LightApi, optim_model) { lite::Predictor cxx_api; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM }); // On ARM devices, the preferred X86 target not works, but it can still // select ARM kernels. - cxx_api.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places); + cxx_api.Build(FLAGS_model_dir, "", "", valid_places); cxx_api.SaveModel(FLAGS_optimized_model); } @@ -72,17 +67,12 @@ TEST(CXXApi_LightApi, save_and_load_model) { // CXXAPi { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM }); // On ARM devices, the preferred X86 target not works, but it can still // select ARM kernels. - cxx_api.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places); + cxx_api.Build(FLAGS_model_dir, "", "", valid_places); auto* x = cxx_api.GetInput(0); SetConstInput(x); diff --git a/lite/api/benchmark.cc b/lite/api/benchmark.cc index 02313554d9..a423cd07a2 100644 --- a/lite/api/benchmark.cc +++ b/lite/api/benchmark.cc @@ -47,9 +47,7 @@ void OutputOptModel(const std::string& load_model_dir, Place{TARGET(kARM), PRECISION(kInt8)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, - Place{TARGET(kHost), PRECISION(kFloat)}, }); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)}); auto predictor = lite_api::CreatePaddlePredictor(config); int ret = system( diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 93568882bc..490a184c2d 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -110,14 +110,12 @@ void Predictor::Build(const lite_api::CxxConfig &config, const std::string &model_path = config.model_dir(); const std::string &model_file = config.model_file(); const std::string ¶m_file = config.param_file(); - const Place prefer_place = config.preferred_place(); const bool model_from_memory = config.model_from_memory(); LOG(INFO) << "load from memory " << model_from_memory; Build(model_path, model_file, param_file, - prefer_place, valid_places, passes, model_type, @@ -126,7 +124,6 @@ void Predictor::Build(const lite_api::CxxConfig &config, void Predictor::Build(const std::string &model_path, const std::string &model_file, const std::string ¶m_file, - const Place &prefer_place, const std::vector &valid_places, const std::vector &passes, lite_api::LiteModelType model_type, @@ -153,21 +150,24 @@ void Predictor::Build(const std::string &model_path, default: LOG(FATAL) << "Unknown model type"; } - Build(program_desc_, prefer_place, valid_places, passes); + Build(program_desc_, valid_places, passes); } void Predictor::Build(const cpp::ProgramDesc &desc, - const Place &prefer_place, const std::vector &valid_places, const std::vector &passes) { program_desc_ = desc; - Program program(desc, scope_, valid_places); - optimizer_.KernelPickPreferPlace(prefer_place); + std::vector inner_places = valid_places; + inner_places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)); + inner_places.emplace_back( + TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)); + Program program(desc, scope_, inner_places); + /// The first place in valid_places is core::KernelPickFactor factor; factor.ConsiderTarget(); factor.ConsiderPrecision(); factor.ConsiderDataLayout(); - optimizer_.Run(std::move(program), valid_places, factor, passes); + optimizer_.Run(std::move(program), inner_places, factor, passes); exec_scope_ = optimizer_.exec_scope(); } diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index 1fe7c985af..7f5490fa9f 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -50,14 +50,12 @@ class LITE_API Predictor { const std::string& model_path, const std::string& model_file_path, const std::string& param_file_path, - const Place& prefer_place, const std::vector& valid_places, const std::vector& passes = {}, lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf, bool memory_from_memory = false); void Build(const cpp::ProgramDesc& desc, - const Place& prefer_place, const std::vector& valid_places, const std::vector& passes = {}); @@ -132,10 +130,8 @@ class LITE_API Predictor { class LITE_API CXXTrainer { public: CXXTrainer(const std::shared_ptr& root_scope, - const Place& preferred_place, const std::vector& valid_places) : scope_(root_scope), - preferred_place_(preferred_place), valid_places_(valid_places), main_program_executor_(Predictor(scope_)) {} @@ -144,7 +140,7 @@ class LITE_API CXXTrainer { // NOTE Just support to execute the 0-th block currently. Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc, int block_id = 0) { - main_program_executor_.Build(desc, preferred_place_, valid_places_); + main_program_executor_.Build(desc, valid_places_); return main_program_executor_; } @@ -162,14 +158,12 @@ class LITE_API CXXTrainer { void RunStartupProgram(const framework::proto::ProgramDesc& desc, int block_id = 0) { Predictor exe(scope_); - exe.Build(desc, preferred_place_, valid_places_); + exe.Build(desc, valid_places_); exe.Run(); } private: std::shared_ptr scope_; - - Place preferred_place_; std::vector valid_places_; // The training program. diff --git a/lite/api/cxx_api_bin.cc b/lite/api/cxx_api_bin.cc index 000e94307c..8c929e9c87 100644 --- a/lite/api/cxx_api_bin.cc +++ b/lite/api/cxx_api_bin.cc @@ -35,13 +35,11 @@ void Run(const char* model_dir, int repeat) { #endif lite::Predictor predictor; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - predictor.Build( - model_dir, "", "", Place{TARGET(kARM), PRECISION(kInt8)}, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 8091e2ffd5..a92ef0be88 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -62,7 +62,6 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { Env::Init(); #endif auto places = config.valid_places(); - places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)); raw_predictor_.Build(config, places); raw_predictor_.PrepareFeedFetch(); } diff --git a/lite/api/cxx_api_test.cc b/lite/api/cxx_api_test.cc index c562b9f080..4d711302cb 100644 --- a/lite/api/cxx_api_test.cc +++ b/lite/api/cxx_api_test.cc @@ -43,13 +43,8 @@ TEST(CXXApi, test) { TEST(CXXApi, save_model) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kCUDA), PRECISION(kFloat)}, - valid_places); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); + predictor.Build(FLAGS_model_dir, "", "", valid_places); LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model, @@ -59,11 +54,11 @@ TEST(CXXApi, save_model) { } /*TEST(CXXTrainer, train) { - Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); - std::vector valid_places({prefer_place}); + Place place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); + std::vector valid_places({place}); auto scope = std::make_shared(); - CXXTrainer trainer(scope, prefer_place, valid_places); + CXXTrainer trainer(scope, valid_places); std::string main_program_pb, startup_program_pb; ReadBinaryFile(FLAGS_main_program_path, &main_program_pb); @@ -94,13 +89,8 @@ TEST(CXXApi, save_model) { #ifdef LITE_WITH_ARM TEST(CXXApi, save_model) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); + predictor.Build(FLAGS_model_dir, "", "", valid_places); LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model); @@ -110,12 +100,10 @@ TEST(CXXApi, save_model) { TEST(CXXApi, load_model_naive) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); predictor.Build(FLAGS_optimized_model + ".naive", "", "", - Place{TARGET(kARM), PRECISION(kFloat)}, valid_places, {}, lite_api::LiteModelType::kNaiveBuffer); diff --git a/lite/api/detection_model_test.cc b/lite/api/detection_model_test.cc index 2d79653baa..c14acbac41 100644 --- a/lite/api/detection_model_test.cc +++ b/lite/api/detection_model_test.cc @@ -34,7 +34,6 @@ void OutputOptModel(const std::string& load_model_dir, const std::string& save_optimized_model_dir) { lite_api::CxxConfig config; config.set_model_dir(load_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, diff --git a/lite/api/efficientnet_b0_test.cc b/lite/api/efficientnet_b0_test.cc index fa16a6be81..61d74eb354 100644 --- a/lite/api/efficientnet_b0_test.cc +++ b/lite/api/efficientnet_b0_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector &valid_places, - const Place &preferred_place) { +void TestModel(const std::vector &valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto *input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -80,22 +79,20 @@ void TestModel(const std::vector &valid_places, TEST(EfficientNetB0, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Place{TARGET(kOpenCL), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } TEST(EfficientNetB0, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/inceptionv4_test.cc b/lite/api/inceptionv4_test.cc index ae772dbba5..95ad5121ca 100644 --- a/lite/api/inceptionv4_test.cc +++ b/lite/api/inceptionv4_test.cc @@ -30,14 +30,9 @@ TEST(InceptionV4, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/lite_api_test_helper.cc b/lite/api/lite_api_test_helper.cc index cd576998d3..802f6d4b52 100644 --- a/lite/api/lite_api_test_helper.cc +++ b/lite/api/lite_api_test_helper.cc @@ -24,24 +24,16 @@ namespace lite { const lite::Tensor* RunHvyModel() { lite::Predictor predictor; #ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); #else std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, }); #endif - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({100, 100}))); diff --git a/lite/api/mobilenetv1_int8_test.cc b/lite/api/mobilenetv1_int8_test.cc index d3ac115fa2..2a54042f43 100644 --- a/lite/api/mobilenetv1_int8_test.cc +++ b/lite/api/mobilenetv1_int8_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -93,12 +92,11 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kInt8)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/mobilenetv1_ssd_test.cc b/lite/api/mobilenetv1_ssd_test.cc index c93da43c11..8eacbe2619 100644 --- a/lite/api/mobilenetv1_ssd_test.cc +++ b/lite/api/mobilenetv1_ssd_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 300, 300}))); @@ -99,7 +98,6 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1_SSD, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); diff --git a/lite/api/mobilenetv1_test.cc b/lite/api/mobilenetv1_test.cc index f4bb7318df..63a401745b 100644 --- a/lite/api/mobilenetv1_test.cc +++ b/lite/api/mobilenetv1_test.cc @@ -28,14 +28,13 @@ namespace paddle { namespace lite { void TestModel(const std::vector& valid_places, - const Place& preferred_place, const std::string& model_dir = FLAGS_model_dir, bool save_model = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(model_dir, "", "", preferred_place, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -103,41 +102,32 @@ void TestModel(const std::vector& valid_places, #ifdef LITE_WITH_NPU TEST(MobileNetV1, test_npu) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kNPU), PRECISION(kFloat)}, }); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_model_dir, - true /* save_model*/); + TestModel(valid_places, FLAGS_model_dir, true /* save_model*/); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_optimized_model, - false /* save model */); + TestModel(valid_places, FLAGS_optimized_model, false /* save model */); } #endif // LITE_WITH_NPU TEST(MobileNetV1, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(MobileNetV1, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/mobilenetv1_yolov3_test.cc b/lite/api/mobilenetv1_yolov3_test.cc index 7ea33528ca..09f9b6d11a 100644 --- a/lite/api/mobilenetv1_yolov3_test.cc +++ b/lite/api/mobilenetv1_yolov3_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 608, 608}))); @@ -106,11 +105,10 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1_YoloV3, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_ARM diff --git a/lite/api/mobilenetv2_test.cc b/lite/api/mobilenetv2_test.cc index 09dbd0f8bf..84bd27e352 100644 --- a/lite/api/mobilenetv2_test.cc +++ b/lite/api/mobilenetv2_test.cc @@ -29,14 +29,13 @@ namespace lite { #ifdef LITE_WITH_ARM void TestModel(const std::vector& valid_places, - const Place& preferred_place, const std::string& model_dir = FLAGS_model_dir, bool save_model = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(model_dir, "", "", preferred_place, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -103,41 +102,32 @@ void TestModel(const std::vector& valid_places, #ifdef LITE_WITH_NPU TEST(MobileNetV2, test_npu) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kNPU), PRECISION(kFloat)}, }); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_model_dir, - true /* save_model*/); + TestModel(valid_places, FLAGS_model_dir, true /* save_model*/); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_optimized_model, - false /* save model */); + TestModel(valid_places, FLAGS_optimized_model, false /* save model */); } #endif // LITE_WITH_NPU TEST(MobileNetV2, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(MobileNetV2, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc index 37c09b3446..7dd2077008 100644 --- a/lite/api/model_optimize_tool.cc +++ b/lite/api/model_optimize_tool.cc @@ -82,7 +82,6 @@ void Main() { target_repr.c_str()); } } - valid_places.emplace_back(TARGET(kHost)); CHECK(!valid_places.empty()) << "At least one target should be set, should set the " @@ -90,8 +89,8 @@ void Main() { if (FLAGS_prefer_int8_kernel) { LOG(WARNING) << "Int8 mode is only support by ARM target"; - valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)}); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)}); + valid_places.insert(valid_places.begin(), + Place{TARGET(kARM), PRECISION(kInt8)}); } config.set_valid_places(valid_places); diff --git a/lite/api/model_run_test_image.cc b/lite/api/model_run_test_image.cc index 099a74ed7f..f3cd35c524 100644 --- a/lite/api/model_run_test_image.cc +++ b/lite/api/model_run_test_image.cc @@ -30,16 +30,14 @@ TEST(model, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}}); auto precision = PRECISION(kFloat); if (FLAGS_int8) { precision = PRECISION(kInt8); } - predictor.Build( - FLAGS_model_dir, "", "", Place{TARGET(kARM), precision}, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); int im_width = FLAGS_im_width; int im_height = FLAGS_im_height; auto* input_tensor = predictor.GetInput(0); diff --git a/lite/api/model_test.cc b/lite/api/model_test.cc index 114d1acdbe..e027a55413 100644 --- a/lite/api/model_test.cc +++ b/lite/api/model_test.cc @@ -36,11 +36,9 @@ void OutputOptModel(const std::string& load_model_dir, const std::vector>& input_shapes) { lite_api::CxxConfig config; config.set_model_dir(load_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, - Place{TARGET(kHost), PRECISION(kFloat)}, }); auto predictor = lite_api::CreatePaddlePredictor(config); diff --git a/lite/api/ocr_attention_test.cc b/lite/api/ocr_attention_test.cc index 89cf6a3e8d..5e39c5437c 100644 --- a/lite/api/ocr_attention_test.cc +++ b/lite/api/ocr_attention_test.cc @@ -25,14 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place, - bool use_npu = false) { +void TestModel(const std::vector& valid_places, bool use_npu = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 1, 48, 512}))); @@ -104,11 +102,10 @@ void TestModel(const std::vector& valid_places, TEST(OcrAttention, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index 97b3b31bc7..17417aa729 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -116,14 +116,12 @@ class LITE_API ConfigBase { /// CxxConfig is the config for the Full feature predictor. class LITE_API CxxConfig : public ConfigBase { - Place preferred_place_; std::vector valid_places_; std::string model_file_; std::string param_file_; bool model_from_memory_{false}; public: - void set_preferred_place(const Place& x) { preferred_place_ = x; } void set_valid_places(const std::vector& x) { valid_places_ = x; } void set_model_file(const std::string& path) { model_file_ = path; } void set_param_file(const std::string& path) { param_file_ = path; } @@ -136,7 +134,6 @@ class LITE_API CxxConfig : public ConfigBase { model_from_memory_ = true; } - const Place& preferred_place() const { return preferred_place_; } const std::vector& valid_places() const { return valid_places_; } std::string model_file() const { return model_file_; } std::string param_file() const { return param_file_; } diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index 9946580377..63142d4981 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -28,7 +28,6 @@ namespace lite_api { TEST(CxxApi, run) { lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, diff --git a/lite/api/resnet18_test.cc b/lite/api/resnet18_test.cc index c003dc1dba..5a50367006 100644 --- a/lite/api/resnet18_test.cc +++ b/lite/api/resnet18_test.cc @@ -28,14 +28,9 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(ResNet18, test) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/resnet50_test.cc b/lite/api/resnet50_test.cc index 6e78d12be0..3e5a725b90 100644 --- a/lite/api/resnet50_test.cc +++ b/lite/api/resnet50_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -82,22 +81,20 @@ void TestModel(const std::vector& valid_places, TEST(ResNet50, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(ResNet50, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/resnet50_test_fpga.cc b/lite/api/resnet50_test_fpga.cc index 7ea81cc746..ab647f9699 100644 --- a/lite/api/resnet50_test_fpga.cc +++ b/lite/api/resnet50_test_fpga.cc @@ -29,8 +29,7 @@ namespace lite { TEST(ResNet50, test) { lite::Predictor predictor; std::vector valid_places( - {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}, - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNHWC)}}); + {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}}); predictor.Build(FLAGS_model_dir, "", diff --git a/lite/api/shufflenetv2_test.cc b/lite/api/shufflenetv2_test.cc index f67bc8c6cf..2c1247997c 100644 --- a/lite/api/shufflenetv2_test.cc +++ b/lite/api/shufflenetv2_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim((std::vector({1, 3, 224, 224})))); @@ -80,12 +79,11 @@ void TestModel(const std::vector& valid_places, TEST(ShuffleNetV2, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Place{TARGET(kOpenCL), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/test_googlenet_lite.cc b/lite/api/test_googlenet_lite.cc index 4c9ecd90c6..952892f7c4 100644 --- a/lite/api/test_googlenet_lite.cc +++ b/lite/api/test_googlenet_lite.cc @@ -45,13 +45,11 @@ namespace lite { #ifdef LITE_WITH_X86 TEST(CXXApi, test_lite_googlenet) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/test_inceptionv4_lite_x86.cc b/lite/api/test_inceptionv4_lite_x86.cc index 5d1dbbe144..c1a58b8014 100644 --- a/lite/api/test_inceptionv4_lite_x86.cc +++ b/lite/api/test_inceptionv4_lite_x86.cc @@ -43,8 +43,7 @@ namespace lite { TEST(InceptionV4, test_inceptionv4_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/test_mobilenetv1_lite_x86.cc b/lite/api/test_mobilenetv1_lite_x86.cc index d755410b6a..fc02c1dc78 100644 --- a/lite/api/test_mobilenetv1_lite_x86.cc +++ b/lite/api/test_mobilenetv1_lite_x86.cc @@ -43,8 +43,7 @@ namespace lite { TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); std::string model_dir = FLAGS_model_dir; std::vector passes({"static_kernel_pick_pass", @@ -54,12 +53,7 @@ TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) { "io_copy_kernel_pick_pass", "variable_place_inference_pass", "runtime_context_assign_pass"}); - predictor.Build(model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places, - passes); + predictor.Build(model_dir, "", "", valid_places, passes); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); auto* data = input_tensor->mutable_data(); diff --git a/lite/api/test_mobilenetv2_lite_x86.cc b/lite/api/test_mobilenetv2_lite_x86.cc index b1090cc6f2..47ee32321d 100644 --- a/lite/api/test_mobilenetv2_lite_x86.cc +++ b/lite/api/test_mobilenetv2_lite_x86.cc @@ -44,8 +44,8 @@ namespace lite { TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/unet_test.cc b/lite/api/unet_test.cc index aae5f493eb..697280f288 100644 --- a/lite/api/unet_test.cc +++ b/lite/api/unet_test.cc @@ -30,14 +30,9 @@ TEST(unet, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 512, 512}))); diff --git a/lite/core/mir/fusion/fc_fuse_pass_test.cc b/lite/core/mir/fusion/fc_fuse_pass_test.cc index cbf77084dd..f7aa4bb5ad 100644 --- a/lite/core/mir/fusion/fc_fuse_pass_test.cc +++ b/lite/core/mir/fusion/fc_fuse_pass_test.cc @@ -30,16 +30,12 @@ namespace mir { TEST(fc_fuse_pass, fuse_test) { lite::Predictor predictor; #ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); #else std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, }); #endif @@ -72,8 +68,7 @@ TEST(fc_fuse_pass, fuse_test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(fc_fuse_pass, save_model_test) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, "", "", diff --git a/lite/core/mir/static_kernel_pick_pass.cc b/lite/core/mir/static_kernel_pick_pass.cc index adadbd6d98..90aca56aec 100644 --- a/lite/core/mir/static_kernel_pick_pass.cc +++ b/lite/core/mir/static_kernel_pick_pass.cc @@ -24,8 +24,8 @@ namespace paddle { namespace lite { namespace mir { -bool KernelScoreCmp(const std::pair>& a, - const std::pair>& b) { +bool KernelScoreCmp(const std::pair>& a, + const std::pair>& b) { return a.first > b.first; } @@ -44,12 +44,12 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { auto& instruct = node.AsStmt(); // Get candidate kernels - std::vector>> scored; + std::vector>> scored; CHECK(!instruct.kernels().empty()) << "No kernels found for " << instruct.op_type(); VLOG(4) << "instruct.kernels().size():" << instruct.kernels().size(); for (auto&& kernel : instruct.kernels()) { - size_t score = KernelGrade(*kernel); + float score = KernelGrade(*kernel, graph->valid_places()); VLOG(4) << "kernel->summary():" << kernel->summary() << " score:" << score; scored.emplace_back(score, std::move(kernel)); @@ -99,7 +99,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { instruct.ResetOp(update_desc, graph->valid_places()); scored.clear(); for (auto&& kernel : instruct.kernels()) { - size_t score = KernelGrade(*kernel); + float score = KernelGrade(*kernel, graph->valid_places()); scored.emplace_back(score, std::move(kernel)); } std::sort(scored.begin(), scored.end(), KernelScoreCmp); diff --git a/lite/core/mir/static_kernel_pick_pass.h b/lite/core/mir/static_kernel_pick_pass.h index 4e8707aa49..7187ddcef6 100644 --- a/lite/core/mir/static_kernel_pick_pass.h +++ b/lite/core/mir/static_kernel_pick_pass.h @@ -16,6 +16,7 @@ #include #include +#include #include "lite/core/mir/pass.h" #include "lite/core/types.h" @@ -38,8 +39,6 @@ class StaticKernelPickPass : public mir::StmtPass { public: void Apply(const std::unique_ptr& graph) override; - void SetPreferPlace(const Place& place) { place_ = place; } - const Place& place() const { return place_; } const core::KernelPickFactor& kernel_pick_factors() const { return kernel_pick_factors_; } @@ -49,39 +48,63 @@ class StaticKernelPickPass : public mir::StmtPass { private: // Score the kernel. - size_t KernelGrade(const lite::KernelBase& kernel) { - size_t score{}; + size_t KernelGrade(const lite::KernelBase& kernel, + const std::vector& places) { + CHECK_GT(places.size(), 0) << "valid_places is empty."; + float final_score{-1.}; + Place winner_place{places[0]}; const int kMax = std::numeric_limits::max(); - VLOG(4) << "[score s1]:" << score; - // The more important factor comes first - if (kernel_pick_factors_.IsTargetConsidered() && - (place().target == kernel.target() || kernel.target() == TARGET(kAny) || - place().target == TARGET(kAny))) { - score += - kMax / static_cast(core::KernelPickFactor::Factor::TargetFirst); - } - VLOG(4) << "[score s2]:" << score; - if (kernel_pick_factors_.IsPrecisionConsidered() && - (place().precision == kernel.precision() || - kernel.precision() == PRECISION(kAny) || - place().precision == PRECISION(kAny))) { - score += kMax / - static_cast(core::KernelPickFactor::Factor::PrecisionFirst); - } - VLOG(4) << "[score s3]:" << score; - if (kernel_pick_factors_.IsDataLayoutConsidered() && - (place().layout == kernel.layout() || - kernel.layout() == DATALAYOUT(kAny) || - place().layout == DATALAYOUT(kAny))) { - score += kMax / static_cast( - core::KernelPickFactor::Factor::DataLayoutFirst); + size_t place_size = places.size(); + + // NOTE: We compare kernel's place with place in valid_places to select the + // best match place + // The place's order in valid_places array decide the user's + // preference + // final_score = weight * socre + // weight: The weight is compute with (valid_places.size() - i) / + // valid_places.size() as default. + // where i is the place's index in valid_places array. + // score: score is the weighted sum of target、percision and layout + for (int i = 0; i < place_size; ++i) { + const auto& place = places[i]; + float weight = static_cast(place_size - i) / place_size; + size_t score{}; + // The more important factor comes first + if (kernel_pick_factors_.IsTargetConsidered() && + (place.target == kernel.target() || kernel.target() == TARGET(kAny) || + place.target == TARGET(kAny))) { + score += kMax / + static_cast(core::KernelPickFactor::Factor::TargetFirst); + } + VLOG(4) << "[score s1]:" << score; + if (kernel_pick_factors_.IsPrecisionConsidered() && + (place.precision == kernel.precision() || + kernel.precision() == PRECISION(kAny) || + place.precision == PRECISION(kAny))) { + score += kMax / static_cast( + core::KernelPickFactor::Factor::PrecisionFirst); + } + VLOG(4) << "[score s2]:" << score; + if (kernel_pick_factors_.IsDataLayoutConsidered() && + (place.layout == kernel.layout() || + kernel.layout() == DATALAYOUT(kAny) || + place.layout == DATALAYOUT(kAny))) { + score += kMax / static_cast( + core::KernelPickFactor::Factor::DataLayoutFirst); + } + VLOG(4) << "[score s3]:" << score; + if (weight * score > final_score) { + final_score = weight * score; + winner_place = place; + } } - VLOG(4) << "[score s4(final)]:" << score; + + VLOG(4) << "[score(final)]:" << final_score; VLOG(4) << "-------- pick summary --------"; - VLOG(4) << " ===> place():" << PrecisionToStr(place().precision) << " " - << DataLayoutToStr(place().layout) << " " - << TargetToStr(place().target); + VLOG(4) << " ===> place():" << PrecisionToStr(winner_place.precision) << " " + << DataLayoutToStr(winner_place.layout) << " " + << TargetToStr(winner_place.target); VLOG(4) << " ===> kernel.place():" << PrecisionToStr(kernel.place().precision) << " " << DataLayoutToStr(kernel.place().layout) << " " @@ -89,20 +112,18 @@ class StaticKernelPickPass : public mir::StmtPass { VLOG(4) << "kernel.op_type():" << kernel.op_type(); VLOG(4) << "picker tactic " << kernel_pick_factors_; VLOG(4) << "kernel place " << kernel.place().DebugString(); - VLOG(4) << "picker place " << place().DebugString(); - VLOG(4) << "score " << score; + VLOG(4) << "picker place " << winner_place.DebugString(); VLOG(4) << "------------------------------"; // The data layout is not considered, for the input and output arguments // might have different data layout. // TODO(Superjomn) reconsider the idea of taking the data layout as a kernel // specification. - return score; + return final_score; } private: core::KernelPickFactor kernel_pick_factors_; - Place place_; }; } // namespace mir diff --git a/lite/core/mir/subgraph/generate_npu_program_pass_test.cc b/lite/core/mir/subgraph/generate_npu_program_pass_test.cc index 25b1482b4b..88095df502 100644 --- a/lite/core/mir/subgraph/generate_npu_program_pass_test.cc +++ b/lite/core/mir/subgraph/generate_npu_program_pass_test.cc @@ -106,7 +106,6 @@ std::shared_ptr TestModel( const std::string& model_dir, const std::string& model_file, const std::string& params_file, - const lite_api::Place& preferred_place, const std::vector& valid_places, const std::vector>& input_tensor_shape, const std::string& optimized_model_dir) { @@ -115,7 +114,6 @@ std::shared_ptr TestModel( cxx_config.set_model_dir(model_dir); cxx_config.set_model_file(model_file); cxx_config.set_param_file(params_file); - cxx_config.set_preferred_place(preferred_place); cxx_config.set_valid_places(valid_places); auto predictor = lite_api::CreatePaddlePredictor(cxx_config); FillInputTensor(predictor, input_tensor_shape, 1); @@ -151,9 +149,7 @@ TEST(NPUSubgraph, compare) { TestModel(FLAGS_model_dir, FLAGS_model_file, FLAGS_params_file, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, - {lite_api::Place{TARGET(kHost), PRECISION(kFloat)}, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}}, + {lite_api::Place{TARGET(kARM), PRECISION(kFloat)}}, input_tensor_shape, FLAGS_optimized_model_dir + "/CPU"); // generate and run optimized NPU model @@ -162,9 +158,7 @@ TEST(NPUSubgraph, compare) { TestModel(FLAGS_model_dir, FLAGS_model_file, FLAGS_params_file, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, - {lite_api::Place{TARGET(kHost), PRECISION(kFloat)}, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + {lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}}, input_tensor_shape, FLAGS_optimized_model_dir + "/NPU"); diff --git a/lite/core/mir/variable_place_inference_pass_test.cc b/lite/core/mir/variable_place_inference_pass_test.cc index cf86afd590..dec37078fa 100644 --- a/lite/core/mir/variable_place_inference_pass_test.cc +++ b/lite/core/mir/variable_place_inference_pass_test.cc @@ -63,18 +63,6 @@ TEST(variable_place_inference_pass, test) { "type_target_cast_pass", // }); - Place prefered_place{ -#ifdef PADDLE_WITH_CUDA - TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), -#else -#ifdef PADDLE_WITH_ARM - TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW), -#else // X86 - TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW), -#endif // ARM -#endif - }; - optimizer.KernelPickPreferPlace(prefered_place); optimizer.Run(std::move(program), places, factor, passes); } diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h index 6f91fee991..37deddb346 100644 --- a/lite/core/optimizer.h +++ b/lite/core/optimizer.h @@ -117,13 +117,6 @@ class Optimizer { exec_scope_ = program.exec_scope(); } - void KernelPickPreferPlace(const Place& place) { - auto* pass = mir::PassManager::Global().LookUp( - "static_kernel_pick_pass"); - CHECK(pass); - pass->SetPreferPlace(place); - } - const lite::Scope* exec_scope() const { return exec_scope_; } // Generate a new program based on the mir graph. diff --git a/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc b/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc index 18167e3ca1..5ac041b2cc 100644 --- a/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc +++ b/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc @@ -38,10 +38,8 @@ void RunModel() { config.set_model_dir(FLAGS_model_dir); std::vector valid_places{Place{TARGET(kARM), PRECISION(kFloat)}}; if (FLAGS_prefer_int8_kernel) { - valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)}); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)}); - } else { - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)}); + valid_places.insert(valid_places.begin(), + Place{TARGET(kARM), PRECISION(kInt8)}); } config.set_valid_places(valid_places); diff --git a/lite/gen_code/gen_code.h b/lite/gen_code/gen_code.h index 7dea36636a..58a7959f4e 100644 --- a/lite/gen_code/gen_code.h +++ b/lite/gen_code/gen_code.h @@ -102,7 +102,7 @@ class Module { void AddValidPlaceDecl() { // clang-format off - Line("std::vector valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)}), lite::Place({TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)})});"); // NOLINT + Line("std::vector valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)})});"); // NOLINT // clang-format on } diff --git a/lite/tools/debug/model_debug_tool.cc b/lite/tools/debug/model_debug_tool.cc index 30f35ca7fc..4b27db7a8d 100644 --- a/lite/tools/debug/model_debug_tool.cc +++ b/lite/tools/debug/model_debug_tool.cc @@ -35,7 +35,6 @@ void Run(DebugConfig* conf) { #endif lite::Predictor predictor; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, #ifdef LITE_WITH_ARM Place{TARGET(kARM), PRECISION(kFloat)}, #endif @@ -60,23 +59,7 @@ void Run(DebugConfig* conf) { "runtime_context_assign_pass", }}; - predictor.Build(conf->model_dir, - "", - "", -#ifdef LITE_WITH_ARM - Place{TARGET(kARM), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_X86 - Place{TARGET(kX86), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_FPGA - Place{TARGET(kFPGA), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_CUDA - Place{TARGET(kCUDA), PRECISION(kFloat)}, -#endif - valid_places, - passes); + predictor.Build(conf->model_dir, "", "", valid_places, passes); predictor.GenRuntimeProgram(); auto& instructions = predictor.runtime_program().instructions(); -- GitLab