diff --git a/lite/api/android/jni/native/convert_util_jni.h b/lite/api/android/jni/native/convert_util_jni.h index c9e8e2e4a67f36d495cec532ba5be41811d3f5a7..5e5d3723e43eb311f64b85f7507a12497d724109 100644 --- a/lite/api/android/jni/native/convert_util_jni.h +++ b/lite/api/android/jni/native/convert_util_jni.h @@ -145,8 +145,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) { jmethodID model_dir_method = env->GetMethodID(cxxconfig_jclazz, "getModelDir", "()Ljava/lang/String;"); - jmethodID preferred_place_method = env->GetMethodID( - cxxconfig_jclazz, "getPreferredPlace", "()Lcom/baidu/paddle/lite/Place;"); jmethodID valid_places_method = env->GetMethodID( cxxconfig_jclazz, "getValidPlaces", "()[Lcom/baidu/paddle/lite/Place;"); @@ -159,13 +157,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) { config.set_model_dir(cpp_model_dir); } - jobject java_preferred_place = - env->CallObjectMethod(jcxxconfig, preferred_place_method); - if (java_preferred_place != nullptr) { - Place cpp_preferred_place = jplace_to_cpp_place(env, java_preferred_place); - config.set_preferred_place(cpp_preferred_place); - } - jobject object_valid_places = env->CallObjectMethod(jcxxconfig, valid_places_method); jobjectArray *java_valid_places = diff --git a/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java b/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java index 906293c92fe379caf7e05c805cbbf9a55f0896bd..3f68ef89228d44e41f8d1d5a0ba65791484bb0aa 100644 --- a/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java +++ b/lite/api/android/jni/src/com/baidu/paddle/lite/CxxConfig.java @@ -18,17 +18,8 @@ package com.baidu.paddle.lite; */ public class CxxConfig extends ConfigBase { - protected Place preferredPlace; protected Place[] validPlaces; - public Place getPreferredPlace() { - return preferredPlace; - } - - public void setPreferredPlace(Place preferredPlace) { - this.preferredPlace = preferredPlace; - } - public Place[] getValidPlaces() { return validPlaces; } diff --git a/lite/api/apis_test.cc b/lite/api/apis_test.cc index 3dc02240846ed4fc6dc310e3a27725792463da6e..ac2c385d53ea0a1785393cd488d115d20c4264f1 100644 --- a/lite/api/apis_test.cc +++ b/lite/api/apis_test.cc @@ -51,17 +51,12 @@ bool CompareTensors(const std::string& name, TEST(CXXApi_LightApi, optim_model) { lite::Predictor cxx_api; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM }); // On ARM devices, the preferred X86 target not works, but it can still // select ARM kernels. - cxx_api.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places); + cxx_api.Build(FLAGS_model_dir, "", "", valid_places); cxx_api.SaveModel(FLAGS_optimized_model); } @@ -72,17 +67,12 @@ TEST(CXXApi_LightApi, save_and_load_model) { // CXXAPi { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM }); // On ARM devices, the preferred X86 target not works, but it can still // select ARM kernels. - cxx_api.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places); + cxx_api.Build(FLAGS_model_dir, "", "", valid_places); auto* x = cxx_api.GetInput(0); SetConstInput(x); diff --git a/lite/api/benchmark.cc b/lite/api/benchmark.cc index 02313554d9bd36d4818a53213cd1fd1fe74c8e56..a423cd07a2e53d9983706864fd86d7f3d1918ec0 100644 --- a/lite/api/benchmark.cc +++ b/lite/api/benchmark.cc @@ -47,9 +47,7 @@ void OutputOptModel(const std::string& load_model_dir, Place{TARGET(kARM), PRECISION(kInt8)}, Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, - Place{TARGET(kHost), PRECISION(kFloat)}, }); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)}); auto predictor = lite_api::CreatePaddlePredictor(config); int ret = system( diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index 93568882bc1b89ec2ece618c581f07b1671dd54a..490a184c2d74277521eb62a35e626a40872d08b3 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -110,14 +110,12 @@ void Predictor::Build(const lite_api::CxxConfig &config, const std::string &model_path = config.model_dir(); const std::string &model_file = config.model_file(); const std::string ¶m_file = config.param_file(); - const Place prefer_place = config.preferred_place(); const bool model_from_memory = config.model_from_memory(); LOG(INFO) << "load from memory " << model_from_memory; Build(model_path, model_file, param_file, - prefer_place, valid_places, passes, model_type, @@ -126,7 +124,6 @@ void Predictor::Build(const lite_api::CxxConfig &config, void Predictor::Build(const std::string &model_path, const std::string &model_file, const std::string ¶m_file, - const Place &prefer_place, const std::vector &valid_places, const std::vector &passes, lite_api::LiteModelType model_type, @@ -153,21 +150,24 @@ void Predictor::Build(const std::string &model_path, default: LOG(FATAL) << "Unknown model type"; } - Build(program_desc_, prefer_place, valid_places, passes); + Build(program_desc_, valid_places, passes); } void Predictor::Build(const cpp::ProgramDesc &desc, - const Place &prefer_place, const std::vector &valid_places, const std::vector &passes) { program_desc_ = desc; - Program program(desc, scope_, valid_places); - optimizer_.KernelPickPreferPlace(prefer_place); + std::vector inner_places = valid_places; + inner_places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)); + inner_places.emplace_back( + TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)); + Program program(desc, scope_, inner_places); + /// The first place in valid_places is core::KernelPickFactor factor; factor.ConsiderTarget(); factor.ConsiderPrecision(); factor.ConsiderDataLayout(); - optimizer_.Run(std::move(program), valid_places, factor, passes); + optimizer_.Run(std::move(program), inner_places, factor, passes); exec_scope_ = optimizer_.exec_scope(); } diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index 1fe7c985afdc50f0e17a2d581876ed7d6947d2ad..7f5490fa9f82d882fc2353c515c0542a365d5d32 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -50,14 +50,12 @@ class LITE_API Predictor { const std::string& model_path, const std::string& model_file_path, const std::string& param_file_path, - const Place& prefer_place, const std::vector& valid_places, const std::vector& passes = {}, lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf, bool memory_from_memory = false); void Build(const cpp::ProgramDesc& desc, - const Place& prefer_place, const std::vector& valid_places, const std::vector& passes = {}); @@ -132,10 +130,8 @@ class LITE_API Predictor { class LITE_API CXXTrainer { public: CXXTrainer(const std::shared_ptr& root_scope, - const Place& preferred_place, const std::vector& valid_places) : scope_(root_scope), - preferred_place_(preferred_place), valid_places_(valid_places), main_program_executor_(Predictor(scope_)) {} @@ -144,7 +140,7 @@ class LITE_API CXXTrainer { // NOTE Just support to execute the 0-th block currently. Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc, int block_id = 0) { - main_program_executor_.Build(desc, preferred_place_, valid_places_); + main_program_executor_.Build(desc, valid_places_); return main_program_executor_; } @@ -162,14 +158,12 @@ class LITE_API CXXTrainer { void RunStartupProgram(const framework::proto::ProgramDesc& desc, int block_id = 0) { Predictor exe(scope_); - exe.Build(desc, preferred_place_, valid_places_); + exe.Build(desc, valid_places_); exe.Run(); } private: std::shared_ptr scope_; - - Place preferred_place_; std::vector valid_places_; // The training program. diff --git a/lite/api/cxx_api_bin.cc b/lite/api/cxx_api_bin.cc index 000e94307ca4acaa3a57597f4a7b0e44a57e0031..8c929e9c8700a65c868e2facd763b0ec36719e23 100644 --- a/lite/api/cxx_api_bin.cc +++ b/lite/api/cxx_api_bin.cc @@ -35,13 +35,11 @@ void Run(const char* model_dir, int repeat) { #endif lite::Predictor predictor; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - predictor.Build( - model_dir, "", "", Place{TARGET(kARM), PRECISION(kInt8)}, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 8091e2ffd55cdabfbb73a1875873da39acd4f85d..a92ef0be88ae53a5479c57f61acc7d2bca14077d 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -62,7 +62,6 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { Env::Init(); #endif auto places = config.valid_places(); - places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)); raw_predictor_.Build(config, places); raw_predictor_.PrepareFeedFetch(); } diff --git a/lite/api/cxx_api_test.cc b/lite/api/cxx_api_test.cc index c562b9f0801c55630bb8f4108a27e7b927c62514..4d711302cb5880247f4a7b7082185c500b9ad6e9 100644 --- a/lite/api/cxx_api_test.cc +++ b/lite/api/cxx_api_test.cc @@ -43,13 +43,8 @@ TEST(CXXApi, test) { TEST(CXXApi, save_model) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kCUDA), PRECISION(kFloat)}, - valid_places); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); + predictor.Build(FLAGS_model_dir, "", "", valid_places); LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model, @@ -59,11 +54,11 @@ TEST(CXXApi, save_model) { } /*TEST(CXXTrainer, train) { - Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); - std::vector valid_places({prefer_place}); + Place place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); + std::vector valid_places({place}); auto scope = std::make_shared(); - CXXTrainer trainer(scope, prefer_place, valid_places); + CXXTrainer trainer(scope, valid_places); std::string main_program_pb, startup_program_pb; ReadBinaryFile(FLAGS_main_program_path, &main_program_pb); @@ -94,13 +89,8 @@ TEST(CXXApi, save_model) { #ifdef LITE_WITH_ARM TEST(CXXApi, save_model) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); + predictor.Build(FLAGS_model_dir, "", "", valid_places); LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model); @@ -110,12 +100,10 @@ TEST(CXXApi, save_model) { TEST(CXXApi, load_model_naive) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); predictor.Build(FLAGS_optimized_model + ".naive", "", "", - Place{TARGET(kARM), PRECISION(kFloat)}, valid_places, {}, lite_api::LiteModelType::kNaiveBuffer); diff --git a/lite/api/detection_model_test.cc b/lite/api/detection_model_test.cc index 2d79653baa17a12aee1d878e6d289af6ce4188a1..c14acbac411aad526cf9271c22891cf7279f3ade 100644 --- a/lite/api/detection_model_test.cc +++ b/lite/api/detection_model_test.cc @@ -34,7 +34,6 @@ void OutputOptModel(const std::string& load_model_dir, const std::string& save_optimized_model_dir) { lite_api::CxxConfig config; config.set_model_dir(load_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, diff --git a/lite/api/efficientnet_b0_test.cc b/lite/api/efficientnet_b0_test.cc index fa16a6be817f2a6160fd2eaf8fd48d9fa9e1aa1a..61d74eb35412291398d4491057013c514ff5e1de 100644 --- a/lite/api/efficientnet_b0_test.cc +++ b/lite/api/efficientnet_b0_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector &valid_places, - const Place &preferred_place) { +void TestModel(const std::vector &valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto *input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -80,22 +79,20 @@ void TestModel(const std::vector &valid_places, TEST(EfficientNetB0, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Place{TARGET(kOpenCL), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } TEST(EfficientNetB0, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/inceptionv4_test.cc b/lite/api/inceptionv4_test.cc index ae772dbba560b855f7f835f7513451713f1099b8..95ad5121caafd70b6b0111bab9c2e76bce75c742 100644 --- a/lite/api/inceptionv4_test.cc +++ b/lite/api/inceptionv4_test.cc @@ -30,14 +30,9 @@ TEST(InceptionV4, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/lite_api_test_helper.cc b/lite/api/lite_api_test_helper.cc index cd576998d3472a8a8c08a77765a03adce7490827..802f6d4b52082ea45867c63a544256ae4b567040 100644 --- a/lite/api/lite_api_test_helper.cc +++ b/lite/api/lite_api_test_helper.cc @@ -24,24 +24,16 @@ namespace lite { const lite::Tensor* RunHvyModel() { lite::Predictor predictor; #ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); #else std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, }); #endif - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({100, 100}))); diff --git a/lite/api/mobilenetv1_int8_test.cc b/lite/api/mobilenetv1_int8_test.cc index d3ac115fa21209171627859fcb7acbd0044dbe26..2a54042f43a8274b52f34eaa4bc426a3712ac107 100644 --- a/lite/api/mobilenetv1_int8_test.cc +++ b/lite/api/mobilenetv1_int8_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -93,12 +92,11 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kInt8)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/mobilenetv1_ssd_test.cc b/lite/api/mobilenetv1_ssd_test.cc index c93da43c11f42a221fff242aacc3378a16d47db4..8eacbe2619c6c55594fd8a280bb1ab2901f24c51 100644 --- a/lite/api/mobilenetv1_ssd_test.cc +++ b/lite/api/mobilenetv1_ssd_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 300, 300}))); @@ -99,7 +98,6 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1_SSD, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); diff --git a/lite/api/mobilenetv1_test.cc b/lite/api/mobilenetv1_test.cc index f4bb7318df6c6c03ddb98a4bbdd142c02bf45f4e..63a401745b325654f81c3af93402703395264c0d 100644 --- a/lite/api/mobilenetv1_test.cc +++ b/lite/api/mobilenetv1_test.cc @@ -28,14 +28,13 @@ namespace paddle { namespace lite { void TestModel(const std::vector& valid_places, - const Place& preferred_place, const std::string& model_dir = FLAGS_model_dir, bool save_model = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(model_dir, "", "", preferred_place, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -103,41 +102,32 @@ void TestModel(const std::vector& valid_places, #ifdef LITE_WITH_NPU TEST(MobileNetV1, test_npu) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kNPU), PRECISION(kFloat)}, }); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_model_dir, - true /* save_model*/); + TestModel(valid_places, FLAGS_model_dir, true /* save_model*/); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_optimized_model, - false /* save model */); + TestModel(valid_places, FLAGS_optimized_model, false /* save model */); } #endif // LITE_WITH_NPU TEST(MobileNetV1, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(MobileNetV1, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/mobilenetv1_yolov3_test.cc b/lite/api/mobilenetv1_yolov3_test.cc index 7ea33528cafe62f3436ae9069ab054ae7d255ee8..09f9b6d11a10fb8eb66e939716aaea4ceaf7f418 100644 --- a/lite/api/mobilenetv1_yolov3_test.cc +++ b/lite/api/mobilenetv1_yolov3_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 608, 608}))); @@ -106,11 +105,10 @@ void TestModel(const std::vector& valid_places, TEST(MobileNetV1_YoloV3, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_ARM diff --git a/lite/api/mobilenetv2_test.cc b/lite/api/mobilenetv2_test.cc index 09dbd0f8bf13c523c71f46551efef815343143ed..84bd27e352f549d619cfa51f9127f973023e6d45 100644 --- a/lite/api/mobilenetv2_test.cc +++ b/lite/api/mobilenetv2_test.cc @@ -29,14 +29,13 @@ namespace lite { #ifdef LITE_WITH_ARM void TestModel(const std::vector& valid_places, - const Place& preferred_place, const std::string& model_dir = FLAGS_model_dir, bool save_model = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads); lite::Predictor predictor; - predictor.Build(model_dir, "", "", preferred_place, valid_places); + predictor.Build(model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -103,41 +102,32 @@ void TestModel(const std::vector& valid_places, #ifdef LITE_WITH_NPU TEST(MobileNetV2, test_npu) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kNPU), PRECISION(kFloat)}, }); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_model_dir, - true /* save_model*/); + TestModel(valid_places, FLAGS_model_dir, true /* save_model*/); - TestModel(valid_places, - Place({TARGET(kARM), PRECISION(kFloat)}), - FLAGS_optimized_model, - false /* save model */); + TestModel(valid_places, FLAGS_optimized_model, false /* save model */); } #endif // LITE_WITH_NPU TEST(MobileNetV2, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(MobileNetV2, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc index 37c09b344698f1cd3382fa47e8f66359a05615c0..7dd2077008bc81d328ef206a41384c45159af3b3 100644 --- a/lite/api/model_optimize_tool.cc +++ b/lite/api/model_optimize_tool.cc @@ -82,7 +82,6 @@ void Main() { target_repr.c_str()); } } - valid_places.emplace_back(TARGET(kHost)); CHECK(!valid_places.empty()) << "At least one target should be set, should set the " @@ -90,8 +89,8 @@ void Main() { if (FLAGS_prefer_int8_kernel) { LOG(WARNING) << "Int8 mode is only support by ARM target"; - valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)}); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)}); + valid_places.insert(valid_places.begin(), + Place{TARGET(kARM), PRECISION(kInt8)}); } config.set_valid_places(valid_places); diff --git a/lite/api/model_run_test_image.cc b/lite/api/model_run_test_image.cc index 099a74ed7fbf54da2d632150c4438f9ad894bb1d..f3cd35c524c4cae7f940fa77a7330722230455da 100644 --- a/lite/api/model_run_test_image.cc +++ b/lite/api/model_run_test_image.cc @@ -30,16 +30,14 @@ TEST(model, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kInt8)}}); auto precision = PRECISION(kFloat); if (FLAGS_int8) { precision = PRECISION(kInt8); } - predictor.Build( - FLAGS_model_dir, "", "", Place{TARGET(kARM), precision}, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); int im_width = FLAGS_im_width; int im_height = FLAGS_im_height; auto* input_tensor = predictor.GetInput(0); diff --git a/lite/api/model_test.cc b/lite/api/model_test.cc index 114d1acdbe1aa3e73bfa593a7a8950eacf3d415d..e027a5541395de66d9f1e8cb28d9832f27908828 100644 --- a/lite/api/model_test.cc +++ b/lite/api/model_test.cc @@ -36,11 +36,9 @@ void OutputOptModel(const std::string& load_model_dir, const std::vector>& input_shapes) { lite_api::CxxConfig config; config.set_model_dir(load_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, - Place{TARGET(kHost), PRECISION(kFloat)}, }); auto predictor = lite_api::CreatePaddlePredictor(config); diff --git a/lite/api/ocr_attention_test.cc b/lite/api/ocr_attention_test.cc index 89cf6a3e8d3fa29b25d617afdec3df3980755424..5e39c5437c18990be9c6414695a94c6f2c9fcf20 100644 --- a/lite/api/ocr_attention_test.cc +++ b/lite/api/ocr_attention_test.cc @@ -25,14 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place, - bool use_npu = false) { +void TestModel(const std::vector& valid_places, bool use_npu = false) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 1, 48, 512}))); @@ -104,11 +102,10 @@ void TestModel(const std::vector& valid_places, TEST(OcrAttention, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index 97b3b31bc77933fd41e862f4ec6f0d023c16911f..17417aa72964dc89346a9af3c1c9a47116dd6cca 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -116,14 +116,12 @@ class LITE_API ConfigBase { /// CxxConfig is the config for the Full feature predictor. class LITE_API CxxConfig : public ConfigBase { - Place preferred_place_; std::vector valid_places_; std::string model_file_; std::string param_file_; bool model_from_memory_{false}; public: - void set_preferred_place(const Place& x) { preferred_place_ = x; } void set_valid_places(const std::vector& x) { valid_places_ = x; } void set_model_file(const std::string& path) { model_file_ = path; } void set_param_file(const std::string& path) { param_file_ = path; } @@ -136,7 +134,6 @@ class LITE_API CxxConfig : public ConfigBase { model_from_memory_ = true; } - const Place& preferred_place() const { return preferred_place_; } const std::vector& valid_places() const { return valid_places_; } std::string model_file() const { return model_file_; } std::string param_file() const { return param_file_; } diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index 994658037735cd26bb3dcbaf905215f17f306af7..63142d49814473e6dc9ee6e553d95fa86b4058c5 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -28,7 +28,6 @@ namespace lite_api { TEST(CxxApi, run) { lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); - config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)}); config.set_valid_places({ Place{TARGET(kX86), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, diff --git a/lite/api/resnet18_test.cc b/lite/api/resnet18_test.cc index c003dc1dba6500e37d4b0d6b724d743c45ebeebf..5a50367006a8c3eeea0cfa6fe46f393463763ca9 100644 --- a/lite/api/resnet18_test.cc +++ b/lite/api/resnet18_test.cc @@ -28,14 +28,9 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(ResNet18, test) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/resnet50_test.cc b/lite/api/resnet50_test.cc index 6e78d12be07b0887ec9942e8b8c1d2c530b6fc35..3e5a725b9001da760670976666ef624e5dac416b 100644 --- a/lite/api/resnet50_test.cc +++ b/lite/api/resnet50_test.cc @@ -26,13 +26,12 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_ARM -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); @@ -82,22 +81,20 @@ void TestModel(const std::vector& valid_places, TEST(ResNet50, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } #ifdef LITE_WITH_OPENCL TEST(ResNet50, test_opencl) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kOpenCL), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)})); + TestModel(valid_places); } #endif // LITE_WITH_OPENCL diff --git a/lite/api/resnet50_test_fpga.cc b/lite/api/resnet50_test_fpga.cc index 7ea81cc746411c86e6f7a882e3f040cfab98503c..ab647f96998f1c0e73476369611218d0a7930c57 100644 --- a/lite/api/resnet50_test_fpga.cc +++ b/lite/api/resnet50_test_fpga.cc @@ -29,8 +29,7 @@ namespace lite { TEST(ResNet50, test) { lite::Predictor predictor; std::vector valid_places( - {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}, - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNHWC)}}); + {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}}); predictor.Build(FLAGS_model_dir, "", diff --git a/lite/api/shufflenetv2_test.cc b/lite/api/shufflenetv2_test.cc index f67bc8c6cfcc5ad545c43f2ee91a799c295e5838..2c1247997c2dcaa33e5c11af37996cab1e287fa4 100644 --- a/lite/api/shufflenetv2_test.cc +++ b/lite/api/shufflenetv2_test.cc @@ -25,13 +25,12 @@ namespace paddle { namespace lite { -void TestModel(const std::vector& valid_places, - const Place& preferred_place) { +void TestModel(const std::vector& valid_places) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim((std::vector({1, 3, 224, 224})))); @@ -80,12 +79,11 @@ void TestModel(const std::vector& valid_places, TEST(ShuffleNetV2, test_arm) { std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, // Place{TARGET(kOpenCL), PRECISION(kFloat)}, }); - TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)})); + TestModel(valid_places); } } // namespace lite diff --git a/lite/api/test_googlenet_lite.cc b/lite/api/test_googlenet_lite.cc index 4c9ecd90c6962ac390dc6db2f37710615b2c60d8..952892f7c4a7e02b471f67b6dc9b48f5154c73b4 100644 --- a/lite/api/test_googlenet_lite.cc +++ b/lite/api/test_googlenet_lite.cc @@ -45,13 +45,11 @@ namespace lite { #ifdef LITE_WITH_X86 TEST(CXXApi, test_lite_googlenet) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/test_inceptionv4_lite_x86.cc b/lite/api/test_inceptionv4_lite_x86.cc index 5d1dbbe1448433eb5bdde0818229d5e1793ae39c..c1a58b8014acfbcd0890478a23fdfc3599492337 100644 --- a/lite/api/test_inceptionv4_lite_x86.cc +++ b/lite/api/test_inceptionv4_lite_x86.cc @@ -43,8 +43,7 @@ namespace lite { TEST(InceptionV4, test_inceptionv4_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/test_mobilenetv1_lite_x86.cc b/lite/api/test_mobilenetv1_lite_x86.cc index d755410b6a8816cee1de60504e93e1eae5eedd4b..fc02c1dc7855f71ee24f9d0c10d17117debcf6a4 100644 --- a/lite/api/test_mobilenetv1_lite_x86.cc +++ b/lite/api/test_mobilenetv1_lite_x86.cc @@ -43,8 +43,7 @@ namespace lite { TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); std::string model_dir = FLAGS_model_dir; std::vector passes({"static_kernel_pick_pass", @@ -54,12 +53,7 @@ TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) { "io_copy_kernel_pick_pass", "variable_place_inference_pass", "runtime_context_assign_pass"}); - predictor.Build(model_dir, - "", - "", - Place{TARGET(kX86), PRECISION(kFloat)}, - valid_places, - passes); + predictor.Build(model_dir, "", "", valid_places, passes); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); auto* data = input_tensor->mutable_data(); diff --git a/lite/api/test_mobilenetv2_lite_x86.cc b/lite/api/test_mobilenetv2_lite_x86.cc index b1090cc6f260ba1b67c5cca8730a2915900f695f..47ee32321d82c16cdc895687ec86dc285b1cb90c 100644 --- a/lite/api/test_mobilenetv2_lite_x86.cc +++ b/lite/api/test_mobilenetv2_lite_x86.cc @@ -44,8 +44,8 @@ namespace lite { TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kFloat)}}); // LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/api/unet_test.cc b/lite/api/unet_test.cc index aae5f493eb0f67e3d09c7b48eb823dda8b343159..697280f28883138d2603f796c1952c655cd085d8 100644 --- a/lite/api/unet_test.cc +++ b/lite/api/unet_test.cc @@ -30,14 +30,9 @@ TEST(unet, test) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads); lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kARM), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kARM), PRECISION(kFloat)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kARM), PRECISION(kFloat)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 512, 512}))); diff --git a/lite/core/mir/fusion/fc_fuse_pass_test.cc b/lite/core/mir/fusion/fc_fuse_pass_test.cc index cbf77084dd6e6de77617931a077331c16e1f693a..f7aa4bb5adcb848531ecc3a8f63bace1c2e3e0ff 100644 --- a/lite/core/mir/fusion/fc_fuse_pass_test.cc +++ b/lite/core/mir/fusion/fc_fuse_pass_test.cc @@ -30,16 +30,12 @@ namespace mir { TEST(fc_fuse_pass, fuse_test) { lite::Predictor predictor; #ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); #else std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, }); #endif @@ -72,8 +68,7 @@ TEST(fc_fuse_pass, fuse_test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(fc_fuse_pass, save_model_test) { lite::Predictor predictor; - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); + std::vector valid_places({Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, "", "", diff --git a/lite/core/mir/static_kernel_pick_pass.cc b/lite/core/mir/static_kernel_pick_pass.cc index adadbd6d9806f85a3da21df4283a4491e053ab3a..90aca56aec426f6b7ca0d300ded979ae7b10f6df 100644 --- a/lite/core/mir/static_kernel_pick_pass.cc +++ b/lite/core/mir/static_kernel_pick_pass.cc @@ -24,8 +24,8 @@ namespace paddle { namespace lite { namespace mir { -bool KernelScoreCmp(const std::pair>& a, - const std::pair>& b) { +bool KernelScoreCmp(const std::pair>& a, + const std::pair>& b) { return a.first > b.first; } @@ -44,12 +44,12 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { auto& instruct = node.AsStmt(); // Get candidate kernels - std::vector>> scored; + std::vector>> scored; CHECK(!instruct.kernels().empty()) << "No kernels found for " << instruct.op_type(); VLOG(4) << "instruct.kernels().size():" << instruct.kernels().size(); for (auto&& kernel : instruct.kernels()) { - size_t score = KernelGrade(*kernel); + float score = KernelGrade(*kernel, graph->valid_places()); VLOG(4) << "kernel->summary():" << kernel->summary() << " score:" << score; scored.emplace_back(score, std::move(kernel)); @@ -99,7 +99,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { instruct.ResetOp(update_desc, graph->valid_places()); scored.clear(); for (auto&& kernel : instruct.kernels()) { - size_t score = KernelGrade(*kernel); + float score = KernelGrade(*kernel, graph->valid_places()); scored.emplace_back(score, std::move(kernel)); } std::sort(scored.begin(), scored.end(), KernelScoreCmp); diff --git a/lite/core/mir/static_kernel_pick_pass.h b/lite/core/mir/static_kernel_pick_pass.h index 4e8707aa49a92626b77393341e0a1a256965873b..7187ddcef6626888eaaf372f7b027aa5d9bd2a3a 100644 --- a/lite/core/mir/static_kernel_pick_pass.h +++ b/lite/core/mir/static_kernel_pick_pass.h @@ -16,6 +16,7 @@ #include #include +#include #include "lite/core/mir/pass.h" #include "lite/core/types.h" @@ -38,8 +39,6 @@ class StaticKernelPickPass : public mir::StmtPass { public: void Apply(const std::unique_ptr& graph) override; - void SetPreferPlace(const Place& place) { place_ = place; } - const Place& place() const { return place_; } const core::KernelPickFactor& kernel_pick_factors() const { return kernel_pick_factors_; } @@ -49,39 +48,63 @@ class StaticKernelPickPass : public mir::StmtPass { private: // Score the kernel. - size_t KernelGrade(const lite::KernelBase& kernel) { - size_t score{}; + size_t KernelGrade(const lite::KernelBase& kernel, + const std::vector& places) { + CHECK_GT(places.size(), 0) << "valid_places is empty."; + float final_score{-1.}; + Place winner_place{places[0]}; const int kMax = std::numeric_limits::max(); - VLOG(4) << "[score s1]:" << score; - // The more important factor comes first - if (kernel_pick_factors_.IsTargetConsidered() && - (place().target == kernel.target() || kernel.target() == TARGET(kAny) || - place().target == TARGET(kAny))) { - score += - kMax / static_cast(core::KernelPickFactor::Factor::TargetFirst); - } - VLOG(4) << "[score s2]:" << score; - if (kernel_pick_factors_.IsPrecisionConsidered() && - (place().precision == kernel.precision() || - kernel.precision() == PRECISION(kAny) || - place().precision == PRECISION(kAny))) { - score += kMax / - static_cast(core::KernelPickFactor::Factor::PrecisionFirst); - } - VLOG(4) << "[score s3]:" << score; - if (kernel_pick_factors_.IsDataLayoutConsidered() && - (place().layout == kernel.layout() || - kernel.layout() == DATALAYOUT(kAny) || - place().layout == DATALAYOUT(kAny))) { - score += kMax / static_cast( - core::KernelPickFactor::Factor::DataLayoutFirst); + size_t place_size = places.size(); + + // NOTE: We compare kernel's place with place in valid_places to select the + // best match place + // The place's order in valid_places array decide the user's + // preference + // final_score = weight * socre + // weight: The weight is compute with (valid_places.size() - i) / + // valid_places.size() as default. + // where i is the place's index in valid_places array. + // score: score is the weighted sum of target、percision and layout + for (int i = 0; i < place_size; ++i) { + const auto& place = places[i]; + float weight = static_cast(place_size - i) / place_size; + size_t score{}; + // The more important factor comes first + if (kernel_pick_factors_.IsTargetConsidered() && + (place.target == kernel.target() || kernel.target() == TARGET(kAny) || + place.target == TARGET(kAny))) { + score += kMax / + static_cast(core::KernelPickFactor::Factor::TargetFirst); + } + VLOG(4) << "[score s1]:" << score; + if (kernel_pick_factors_.IsPrecisionConsidered() && + (place.precision == kernel.precision() || + kernel.precision() == PRECISION(kAny) || + place.precision == PRECISION(kAny))) { + score += kMax / static_cast( + core::KernelPickFactor::Factor::PrecisionFirst); + } + VLOG(4) << "[score s2]:" << score; + if (kernel_pick_factors_.IsDataLayoutConsidered() && + (place.layout == kernel.layout() || + kernel.layout() == DATALAYOUT(kAny) || + place.layout == DATALAYOUT(kAny))) { + score += kMax / static_cast( + core::KernelPickFactor::Factor::DataLayoutFirst); + } + VLOG(4) << "[score s3]:" << score; + if (weight * score > final_score) { + final_score = weight * score; + winner_place = place; + } } - VLOG(4) << "[score s4(final)]:" << score; + + VLOG(4) << "[score(final)]:" << final_score; VLOG(4) << "-------- pick summary --------"; - VLOG(4) << " ===> place():" << PrecisionToStr(place().precision) << " " - << DataLayoutToStr(place().layout) << " " - << TargetToStr(place().target); + VLOG(4) << " ===> place():" << PrecisionToStr(winner_place.precision) << " " + << DataLayoutToStr(winner_place.layout) << " " + << TargetToStr(winner_place.target); VLOG(4) << " ===> kernel.place():" << PrecisionToStr(kernel.place().precision) << " " << DataLayoutToStr(kernel.place().layout) << " " @@ -89,20 +112,18 @@ class StaticKernelPickPass : public mir::StmtPass { VLOG(4) << "kernel.op_type():" << kernel.op_type(); VLOG(4) << "picker tactic " << kernel_pick_factors_; VLOG(4) << "kernel place " << kernel.place().DebugString(); - VLOG(4) << "picker place " << place().DebugString(); - VLOG(4) << "score " << score; + VLOG(4) << "picker place " << winner_place.DebugString(); VLOG(4) << "------------------------------"; // The data layout is not considered, for the input and output arguments // might have different data layout. // TODO(Superjomn) reconsider the idea of taking the data layout as a kernel // specification. - return score; + return final_score; } private: core::KernelPickFactor kernel_pick_factors_; - Place place_; }; } // namespace mir diff --git a/lite/core/mir/subgraph/generate_npu_program_pass_test.cc b/lite/core/mir/subgraph/generate_npu_program_pass_test.cc index 25b1482b4b1bcf6c6112f967200c5efd50513ed9..88095df502fe05a51b548dde7ce09700855ffae3 100644 --- a/lite/core/mir/subgraph/generate_npu_program_pass_test.cc +++ b/lite/core/mir/subgraph/generate_npu_program_pass_test.cc @@ -106,7 +106,6 @@ std::shared_ptr TestModel( const std::string& model_dir, const std::string& model_file, const std::string& params_file, - const lite_api::Place& preferred_place, const std::vector& valid_places, const std::vector>& input_tensor_shape, const std::string& optimized_model_dir) { @@ -115,7 +114,6 @@ std::shared_ptr TestModel( cxx_config.set_model_dir(model_dir); cxx_config.set_model_file(model_file); cxx_config.set_param_file(params_file); - cxx_config.set_preferred_place(preferred_place); cxx_config.set_valid_places(valid_places); auto predictor = lite_api::CreatePaddlePredictor(cxx_config); FillInputTensor(predictor, input_tensor_shape, 1); @@ -151,9 +149,7 @@ TEST(NPUSubgraph, compare) { TestModel(FLAGS_model_dir, FLAGS_model_file, FLAGS_params_file, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, - {lite_api::Place{TARGET(kHost), PRECISION(kFloat)}, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}}, + {lite_api::Place{TARGET(kARM), PRECISION(kFloat)}}, input_tensor_shape, FLAGS_optimized_model_dir + "/CPU"); // generate and run optimized NPU model @@ -162,9 +158,7 @@ TEST(NPUSubgraph, compare) { TestModel(FLAGS_model_dir, FLAGS_model_file, FLAGS_params_file, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, - {lite_api::Place{TARGET(kHost), PRECISION(kFloat)}, - lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + {lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}}, input_tensor_shape, FLAGS_optimized_model_dir + "/NPU"); diff --git a/lite/core/mir/variable_place_inference_pass_test.cc b/lite/core/mir/variable_place_inference_pass_test.cc index cf86afd590db8b05dcec720455284b3311551848..dec37078fa24e6c7974391d254f3847b7a90e8ba 100644 --- a/lite/core/mir/variable_place_inference_pass_test.cc +++ b/lite/core/mir/variable_place_inference_pass_test.cc @@ -63,18 +63,6 @@ TEST(variable_place_inference_pass, test) { "type_target_cast_pass", // }); - Place prefered_place{ -#ifdef PADDLE_WITH_CUDA - TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), -#else -#ifdef PADDLE_WITH_ARM - TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW), -#else // X86 - TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW), -#endif // ARM -#endif - }; - optimizer.KernelPickPreferPlace(prefered_place); optimizer.Run(std::move(program), places, factor, passes); } diff --git a/lite/core/optimizer.h b/lite/core/optimizer.h index 6f91fee9917f2d4f1ea87a9ad8e331c76ff77e18..37deddb346dea21fee1a97ea819437fbc4d0a812 100644 --- a/lite/core/optimizer.h +++ b/lite/core/optimizer.h @@ -117,13 +117,6 @@ class Optimizer { exec_scope_ = program.exec_scope(); } - void KernelPickPreferPlace(const Place& place) { - auto* pass = mir::PassManager::Global().LookUp( - "static_kernel_pick_pass"); - CHECK(pass); - pass->SetPreferPlace(place); - } - const lite::Scope* exec_scope() const { return exec_scope_; } // Generate a new program based on the mir graph. diff --git a/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc b/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc index 18167e3ca115cbe994882951be909c1d30482e74..5ac041b2cc53e8f17ad86a2b71e6b02058b7e249 100644 --- a/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc +++ b/lite/demo/cxx/mobile_full/mobilenetv1_full_api.cc @@ -38,10 +38,8 @@ void RunModel() { config.set_model_dir(FLAGS_model_dir); std::vector valid_places{Place{TARGET(kARM), PRECISION(kFloat)}}; if (FLAGS_prefer_int8_kernel) { - valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)}); - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)}); - } else { - config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)}); + valid_places.insert(valid_places.begin(), + Place{TARGET(kARM), PRECISION(kInt8)}); } config.set_valid_places(valid_places); diff --git a/lite/gen_code/gen_code.h b/lite/gen_code/gen_code.h index 7dea36636af6fa3682c6f9a66ab237573a54f0b6..58a7959f4eb34cb438bf0e25b49b36110435cc6b 100644 --- a/lite/gen_code/gen_code.h +++ b/lite/gen_code/gen_code.h @@ -102,7 +102,7 @@ class Module { void AddValidPlaceDecl() { // clang-format off - Line("std::vector valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)}), lite::Place({TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)})});"); // NOLINT + Line("std::vector valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)})});"); // NOLINT // clang-format on } diff --git a/lite/tools/debug/model_debug_tool.cc b/lite/tools/debug/model_debug_tool.cc index 30f35ca7fcc2c0221f1435d401b17fb9c87e881a..4b27db7a8d3a2dcf8237660b50631c71dcd4f4af 100644 --- a/lite/tools/debug/model_debug_tool.cc +++ b/lite/tools/debug/model_debug_tool.cc @@ -35,7 +35,6 @@ void Run(DebugConfig* conf) { #endif lite::Predictor predictor; std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat)}, #ifdef LITE_WITH_ARM Place{TARGET(kARM), PRECISION(kFloat)}, #endif @@ -60,23 +59,7 @@ void Run(DebugConfig* conf) { "runtime_context_assign_pass", }}; - predictor.Build(conf->model_dir, - "", - "", -#ifdef LITE_WITH_ARM - Place{TARGET(kARM), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_X86 - Place{TARGET(kX86), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_FPGA - Place{TARGET(kFPGA), PRECISION(kFloat)}, -#endif -#ifdef LITE_WITH_CUDA - Place{TARGET(kCUDA), PRECISION(kFloat)}, -#endif - valid_places, - passes); + predictor.Build(conf->model_dir, "", "", valid_places, passes); predictor.GenRuntimeProgram(); auto& instructions = predictor.runtime_program().instructions();