提交 17833acb 编写于 作者: S sangoly 提交者: GitHub

[framework][place] remove prefered_place and kHost in valid_places (#2192)

* [framework][place] remove prefered_place, use place order in valid_place array instead test=develop

* remove kHost from valid_places test=develop
上级 31ab471e
......@@ -145,8 +145,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) {
jmethodID model_dir_method =
env->GetMethodID(cxxconfig_jclazz, "getModelDir", "()Ljava/lang/String;");
jmethodID preferred_place_method = env->GetMethodID(
cxxconfig_jclazz, "getPreferredPlace", "()Lcom/baidu/paddle/lite/Place;");
jmethodID valid_places_method = env->GetMethodID(
cxxconfig_jclazz, "getValidPlaces", "()[Lcom/baidu/paddle/lite/Place;");
......@@ -159,13 +157,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) {
config.set_model_dir(cpp_model_dir);
}
jobject java_preferred_place =
env->CallObjectMethod(jcxxconfig, preferred_place_method);
if (java_preferred_place != nullptr) {
Place cpp_preferred_place = jplace_to_cpp_place(env, java_preferred_place);
config.set_preferred_place(cpp_preferred_place);
}
jobject object_valid_places =
env->CallObjectMethod(jcxxconfig, valid_places_method);
jobjectArray *java_valid_places =
......
......@@ -18,17 +18,8 @@ package com.baidu.paddle.lite;
*/
public class CxxConfig extends ConfigBase {
protected Place preferredPlace;
protected Place[] validPlaces;
public Place getPreferredPlace() {
return preferredPlace;
}
public void setPreferredPlace(Place preferredPlace) {
this.preferredPlace = preferredPlace;
}
public Place[] getValidPlaces() {
return validPlaces;
}
......
......@@ -51,17 +51,12 @@ bool CompareTensors(const std::string& name,
TEST(CXXApi_LightApi, optim_model) {
lite::Predictor cxx_api;
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places);
cxx_api.Build(FLAGS_model_dir, "", "", valid_places);
cxx_api.SaveModel(FLAGS_optimized_model);
}
......@@ -72,17 +67,12 @@ TEST(CXXApi_LightApi, save_and_load_model) {
// CXXAPi
{
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places);
cxx_api.Build(FLAGS_model_dir, "", "", valid_places);
auto* x = cxx_api.GetInput(0);
SetConstInput(x);
......
......@@ -47,9 +47,7 @@ void OutputOptModel(const std::string& load_model_dir,
Place{TARGET(kARM), PRECISION(kInt8)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kHost), PRECISION(kFloat)},
});
config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)});
auto predictor = lite_api::CreatePaddlePredictor(config);
int ret = system(
......
......@@ -110,14 +110,12 @@ void Predictor::Build(const lite_api::CxxConfig &config,
const std::string &model_path = config.model_dir();
const std::string &model_file = config.model_file();
const std::string &param_file = config.param_file();
const Place prefer_place = config.preferred_place();
const bool model_from_memory = config.model_from_memory();
LOG(INFO) << "load from memory " << model_from_memory;
Build(model_path,
model_file,
param_file,
prefer_place,
valid_places,
passes,
model_type,
......@@ -126,7 +124,6 @@ void Predictor::Build(const lite_api::CxxConfig &config,
void Predictor::Build(const std::string &model_path,
const std::string &model_file,
const std::string &param_file,
const Place &prefer_place,
const std::vector<Place> &valid_places,
const std::vector<std::string> &passes,
lite_api::LiteModelType model_type,
......@@ -153,21 +150,24 @@ void Predictor::Build(const std::string &model_path,
default:
LOG(FATAL) << "Unknown model type";
}
Build(program_desc_, prefer_place, valid_places, passes);
Build(program_desc_, valid_places, passes);
}
void Predictor::Build(const cpp::ProgramDesc &desc,
const Place &prefer_place,
const std::vector<Place> &valid_places,
const std::vector<std::string> &passes) {
program_desc_ = desc;
Program program(desc, scope_, valid_places);
optimizer_.KernelPickPreferPlace(prefer_place);
std::vector<Place> inner_places = valid_places;
inner_places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
inner_places.emplace_back(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW));
Program program(desc, scope_, inner_places);
/// The first place in valid_places is
core::KernelPickFactor factor;
factor.ConsiderTarget();
factor.ConsiderPrecision();
factor.ConsiderDataLayout();
optimizer_.Run(std::move(program), valid_places, factor, passes);
optimizer_.Run(std::move(program), inner_places, factor, passes);
exec_scope_ = optimizer_.exec_scope();
}
......
......@@ -50,14 +50,12 @@ class LITE_API Predictor {
const std::string& model_path,
const std::string& model_file_path,
const std::string& param_file_path,
const Place& prefer_place,
const std::vector<Place>& valid_places,
const std::vector<std::string>& passes = {},
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool memory_from_memory = false);
void Build(const cpp::ProgramDesc& desc,
const Place& prefer_place,
const std::vector<Place>& valid_places,
const std::vector<std::string>& passes = {});
......@@ -132,10 +130,8 @@ class LITE_API Predictor {
class LITE_API CXXTrainer {
public:
CXXTrainer(const std::shared_ptr<lite::Scope>& root_scope,
const Place& preferred_place,
const std::vector<Place>& valid_places)
: scope_(root_scope),
preferred_place_(preferred_place),
valid_places_(valid_places),
main_program_executor_(Predictor(scope_)) {}
......@@ -144,7 +140,7 @@ class LITE_API CXXTrainer {
// NOTE Just support to execute the 0-th block currently.
Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc,
int block_id = 0) {
main_program_executor_.Build(desc, preferred_place_, valid_places_);
main_program_executor_.Build(desc, valid_places_);
return main_program_executor_;
}
......@@ -162,14 +158,12 @@ class LITE_API CXXTrainer {
void RunStartupProgram(const framework::proto::ProgramDesc& desc,
int block_id = 0) {
Predictor exe(scope_);
exe.Build(desc, preferred_place_, valid_places_);
exe.Build(desc, valid_places_);
exe.Run();
}
private:
std::shared_ptr<lite::Scope> scope_;
Place preferred_place_;
std::vector<Place> valid_places_;
// The training program.
......
......@@ -35,13 +35,11 @@ void Run(const char* model_dir, int repeat) {
#endif
lite::Predictor predictor;
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
predictor.Build(
model_dir, "", "", Place{TARGET(kARM), PRECISION(kInt8)}, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -62,7 +62,6 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
Env<TARGET(kCUDA)>::Init();
#endif
auto places = config.valid_places();
places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
raw_predictor_.Build(config, places);
raw_predictor_.PrepareFeedFetch();
}
......
......@@ -43,13 +43,8 @@ TEST(CXXApi, test) {
TEST(CXXApi, save_model) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kCUDA), PRECISION(kFloat)},
valid_places);
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, "", "", valid_places);
LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
predictor.SaveModel(FLAGS_optimized_model,
......@@ -59,11 +54,11 @@ TEST(CXXApi, save_model) {
}
/*TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place});
Place place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({place});
auto scope = std::make_shared<lite::Scope>();
CXXTrainer trainer(scope, prefer_place, valid_places);
CXXTrainer trainer(scope, valid_places);
std::string main_program_pb, startup_program_pb;
ReadBinaryFile(FLAGS_main_program_path, &main_program_pb);
......@@ -94,13 +89,8 @@ TEST(CXXApi, save_model) {
#ifdef LITE_WITH_ARM
TEST(CXXApi, save_model) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, "", "", valid_places);
LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
predictor.SaveModel(FLAGS_optimized_model);
......@@ -110,12 +100,10 @@ TEST(CXXApi, save_model) {
TEST(CXXApi, load_model_naive) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_optimized_model + ".naive",
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places,
{},
lite_api::LiteModelType::kNaiveBuffer);
......
......@@ -34,7 +34,6 @@ void OutputOptModel(const std::string& load_model_dir,
const std::string& save_optimized_model_dir) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......
......@@ -25,13 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place> &valid_places,
const Place &preferred_place) {
void TestModel(const std::vector<Place> &valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto *input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -80,22 +79,20 @@ void TestModel(const std::vector<Place> &valid_places,
TEST(EfficientNetB0, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
TEST(EfficientNetB0, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -30,14 +30,9 @@ TEST(InceptionV4, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -24,24 +24,16 @@ namespace lite {
const lite::Tensor* RunHvyModel() {
lite::Predictor predictor;
#ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
#else
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
});
#endif
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
......
......@@ -25,13 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -93,12 +92,11 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(MobileNetV1, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kInt8)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 300, 300})));
......@@ -99,7 +98,6 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(MobileNetV1_SSD, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
......
......@@ -28,14 +28,13 @@ namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -103,41 +102,32 @@ void TestModel(const std::vector<Place>& valid_places,
#ifdef LITE_WITH_NPU
TEST(MobileNetV1, test_npu) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kNPU), PRECISION(kFloat)},
});
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_model_dir,
true /* save_model*/);
TestModel(valid_places, FLAGS_model_dir, true /* save_model*/);
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_optimized_model,
false /* save model */);
TestModel(valid_places, FLAGS_optimized_model, false /* save model */);
}
#endif // LITE_WITH_NPU
TEST(MobileNetV1, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(MobileNetV1, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 608, 608})));
......@@ -106,11 +105,10 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(MobileNetV1_YoloV3, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_ARM
......
......@@ -29,14 +29,13 @@ namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -103,41 +102,32 @@ void TestModel(const std::vector<Place>& valid_places,
#ifdef LITE_WITH_NPU
TEST(MobileNetV2, test_npu) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kNPU), PRECISION(kFloat)},
});
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_model_dir,
true /* save_model*/);
TestModel(valid_places, FLAGS_model_dir, true /* save_model*/);
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_optimized_model,
false /* save model */);
TestModel(valid_places, FLAGS_optimized_model, false /* save model */);
}
#endif // LITE_WITH_NPU
TEST(MobileNetV2, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(MobileNetV2, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -82,7 +82,6 @@ void Main() {
target_repr.c_str());
}
}
valid_places.emplace_back(TARGET(kHost));
CHECK(!valid_places.empty())
<< "At least one target should be set, should set the "
......@@ -90,8 +89,8 @@ void Main() {
if (FLAGS_prefer_int8_kernel) {
LOG(WARNING) << "Int8 mode is only support by ARM target";
valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)});
config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)});
valid_places.insert(valid_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}
config.set_valid_places(valid_places);
......
......@@ -30,16 +30,14 @@ TEST(model, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)}});
auto precision = PRECISION(kFloat);
if (FLAGS_int8) {
precision = PRECISION(kInt8);
}
predictor.Build(
FLAGS_model_dir, "", "", Place{TARGET(kARM), precision}, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
int im_width = FLAGS_im_width;
int im_height = FLAGS_im_height;
auto* input_tensor = predictor.GetInput(0);
......
......@@ -36,11 +36,9 @@ void OutputOptModel(const std::string& load_model_dir,
const std::vector<std::vector<int64_t>>& input_shapes) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kHost), PRECISION(kFloat)},
});
auto predictor = lite_api::CreatePaddlePredictor(config);
......
......@@ -25,14 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
void TestModel(const std::vector<Place>& valid_places, bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 48, 512})));
......@@ -104,11 +102,10 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(OcrAttention, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -116,14 +116,12 @@ class LITE_API ConfigBase {
/// CxxConfig is the config for the Full feature predictor.
class LITE_API CxxConfig : public ConfigBase {
Place preferred_place_;
std::vector<Place> valid_places_;
std::string model_file_;
std::string param_file_;
bool model_from_memory_{false};
public:
void set_preferred_place(const Place& x) { preferred_place_ = x; }
void set_valid_places(const std::vector<Place>& x) { valid_places_ = x; }
void set_model_file(const std::string& path) { model_file_ = path; }
void set_param_file(const std::string& path) { param_file_ = path; }
......@@ -136,7 +134,6 @@ class LITE_API CxxConfig : public ConfigBase {
model_from_memory_ = true;
}
const Place& preferred_place() const { return preferred_place_; }
const std::vector<Place>& valid_places() const { return valid_places_; }
std::string model_file() const { return model_file_; }
std::string param_file() const { return param_file_; }
......
......@@ -28,7 +28,6 @@ namespace lite_api {
TEST(CxxApi, run) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......
......@@ -28,14 +28,9 @@ namespace lite {
#ifdef LITE_WITH_ARM
TEST(ResNet18, test) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -82,22 +81,20 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(ResNet50, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(ResNet50, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -29,8 +29,7 @@ namespace lite {
TEST(ResNet50, test) {
lite::Predictor predictor;
std::vector<Place> valid_places(
{Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNHWC)}});
{Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}});
predictor.Build(FLAGS_model_dir,
"",
......
......@@ -25,13 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim((std::vector<DDim::value_type>({1, 3, 224, 224}))));
......@@ -80,12 +79,11 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(ShuffleNetV2, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -45,13 +45,11 @@ namespace lite {
#ifdef LITE_WITH_X86
TEST(CXXApi, test_lite_googlenet) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
predictor.Build(
model_dir, "", "", Place{TARGET(kX86), PRECISION(kFloat)}, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -43,8 +43,7 @@ namespace lite {
TEST(InceptionV4, test_inceptionv4_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
......@@ -55,12 +54,7 @@ TEST(InceptionV4, test_inceptionv4_lite_x86) {
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
predictor.Build(model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -43,8 +43,7 @@ namespace lite {
TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
std::string model_dir = FLAGS_model_dir;
std::vector<std::string> passes({"static_kernel_pick_pass",
......@@ -54,12 +53,7 @@ TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) {
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
predictor.Build(model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
......
......@@ -44,8 +44,8 @@ namespace lite {
TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kHost), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
......@@ -56,12 +56,7 @@ TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) {
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
predictor.Build(model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -30,14 +30,9 @@ TEST(unet, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 512, 512})));
......
......@@ -30,16 +30,12 @@ namespace mir {
TEST(fc_fuse_pass, fuse_test) {
lite::Predictor predictor;
#ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
#else
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
});
#endif
......@@ -72,8 +68,7 @@ TEST(fc_fuse_pass, fuse_test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST(fc_fuse_pass, save_model_test) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
......
......@@ -24,8 +24,8 @@ namespace paddle {
namespace lite {
namespace mir {
bool KernelScoreCmp(const std::pair<size_t, std::unique_ptr<KernelBase>>& a,
const std::pair<size_t, std::unique_ptr<KernelBase>>& b) {
bool KernelScoreCmp(const std::pair<float, std::unique_ptr<KernelBase>>& a,
const std::pair<float, std::unique_ptr<KernelBase>>& b) {
return a.first > b.first;
}
......@@ -44,12 +44,12 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
auto& instruct = node.AsStmt();
// Get candidate kernels
std::vector<std::pair<size_t, std::unique_ptr<KernelBase>>> scored;
std::vector<std::pair<float, std::unique_ptr<KernelBase>>> scored;
CHECK(!instruct.kernels().empty()) << "No kernels found for "
<< instruct.op_type();
VLOG(4) << "instruct.kernels().size():" << instruct.kernels().size();
for (auto&& kernel : instruct.kernels()) {
size_t score = KernelGrade(*kernel);
float score = KernelGrade(*kernel, graph->valid_places());
VLOG(4) << "kernel->summary():" << kernel->summary()
<< " score:" << score;
scored.emplace_back(score, std::move(kernel));
......@@ -99,7 +99,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
instruct.ResetOp(update_desc, graph->valid_places());
scored.clear();
for (auto&& kernel : instruct.kernels()) {
size_t score = KernelGrade(*kernel);
float score = KernelGrade(*kernel, graph->valid_places());
scored.emplace_back(score, std::move(kernel));
}
std::sort(scored.begin(), scored.end(), KernelScoreCmp);
......
......@@ -16,6 +16,7 @@
#include <limits>
#include <memory>
#include <vector>
#include "lite/core/mir/pass.h"
#include "lite/core/types.h"
......@@ -38,8 +39,6 @@ class StaticKernelPickPass : public mir::StmtPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override;
void SetPreferPlace(const Place& place) { place_ = place; }
const Place& place() const { return place_; }
const core::KernelPickFactor& kernel_pick_factors() const {
return kernel_pick_factors_;
}
......@@ -49,39 +48,63 @@ class StaticKernelPickPass : public mir::StmtPass {
private:
// Score the kernel.
size_t KernelGrade(const lite::KernelBase& kernel) {
size_t score{};
size_t KernelGrade(const lite::KernelBase& kernel,
const std::vector<Place>& places) {
CHECK_GT(places.size(), 0) << "valid_places is empty.";
float final_score{-1.};
Place winner_place{places[0]};
const int kMax =
std::numeric_limits<core::KernelPickFactor::value_type>::max();
VLOG(4) << "[score s1]:" << score;
// The more important factor comes first
if (kernel_pick_factors_.IsTargetConsidered() &&
(place().target == kernel.target() || kernel.target() == TARGET(kAny) ||
place().target == TARGET(kAny))) {
score +=
kMax / static_cast<int>(core::KernelPickFactor::Factor::TargetFirst);
}
VLOG(4) << "[score s2]:" << score;
if (kernel_pick_factors_.IsPrecisionConsidered() &&
(place().precision == kernel.precision() ||
kernel.precision() == PRECISION(kAny) ||
place().precision == PRECISION(kAny))) {
score += kMax /
static_cast<int>(core::KernelPickFactor::Factor::PrecisionFirst);
}
VLOG(4) << "[score s3]:" << score;
if (kernel_pick_factors_.IsDataLayoutConsidered() &&
(place().layout == kernel.layout() ||
kernel.layout() == DATALAYOUT(kAny) ||
place().layout == DATALAYOUT(kAny))) {
score += kMax / static_cast<int>(
core::KernelPickFactor::Factor::DataLayoutFirst);
size_t place_size = places.size();
// NOTE: We compare kernel's place with place in valid_places to select the
// best match place
// The place's order in valid_places array decide the user's
// preference
// final_score = weight * socre
// weight: The weight is compute with (valid_places.size() - i) /
// valid_places.size() as default.
// where i is the place's index in valid_places array.
// score: score is the weighted sum of target、percision and layout
for (int i = 0; i < place_size; ++i) {
const auto& place = places[i];
float weight = static_cast<float>(place_size - i) / place_size;
size_t score{};
// The more important factor comes first
if (kernel_pick_factors_.IsTargetConsidered() &&
(place.target == kernel.target() || kernel.target() == TARGET(kAny) ||
place.target == TARGET(kAny))) {
score += kMax /
static_cast<int>(core::KernelPickFactor::Factor::TargetFirst);
}
VLOG(4) << "[score s1]:" << score;
if (kernel_pick_factors_.IsPrecisionConsidered() &&
(place.precision == kernel.precision() ||
kernel.precision() == PRECISION(kAny) ||
place.precision == PRECISION(kAny))) {
score += kMax / static_cast<int>(
core::KernelPickFactor::Factor::PrecisionFirst);
}
VLOG(4) << "[score s2]:" << score;
if (kernel_pick_factors_.IsDataLayoutConsidered() &&
(place.layout == kernel.layout() ||
kernel.layout() == DATALAYOUT(kAny) ||
place.layout == DATALAYOUT(kAny))) {
score += kMax / static_cast<int>(
core::KernelPickFactor::Factor::DataLayoutFirst);
}
VLOG(4) << "[score s3]:" << score;
if (weight * score > final_score) {
final_score = weight * score;
winner_place = place;
}
}
VLOG(4) << "[score s4(final)]:" << score;
VLOG(4) << "[score(final)]:" << final_score;
VLOG(4) << "-------- pick summary --------";
VLOG(4) << " ===> place():" << PrecisionToStr(place().precision) << " "
<< DataLayoutToStr(place().layout) << " "
<< TargetToStr(place().target);
VLOG(4) << " ===> place():" << PrecisionToStr(winner_place.precision) << " "
<< DataLayoutToStr(winner_place.layout) << " "
<< TargetToStr(winner_place.target);
VLOG(4) << " ===> kernel.place():"
<< PrecisionToStr(kernel.place().precision) << " "
<< DataLayoutToStr(kernel.place().layout) << " "
......@@ -89,20 +112,18 @@ class StaticKernelPickPass : public mir::StmtPass {
VLOG(4) << "kernel.op_type():" << kernel.op_type();
VLOG(4) << "picker tactic " << kernel_pick_factors_;
VLOG(4) << "kernel place " << kernel.place().DebugString();
VLOG(4) << "picker place " << place().DebugString();
VLOG(4) << "score " << score;
VLOG(4) << "picker place " << winner_place.DebugString();
VLOG(4) << "------------------------------";
// The data layout is not considered, for the input and output arguments
// might have different data layout.
// TODO(Superjomn) reconsider the idea of taking the data layout as a kernel
// specification.
return score;
return final_score;
}
private:
core::KernelPickFactor kernel_pick_factors_;
Place place_;
};
} // namespace mir
......
......@@ -106,7 +106,6 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
const std::string& model_dir,
const std::string& model_file,
const std::string& params_file,
const lite_api::Place& preferred_place,
const std::vector<lite_api::Place>& valid_places,
const std::vector<std::vector<int64_t>>& input_tensor_shape,
const std::string& optimized_model_dir) {
......@@ -115,7 +114,6 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
cxx_config.set_model_dir(model_dir);
cxx_config.set_model_file(model_file);
cxx_config.set_param_file(params_file);
cxx_config.set_preferred_place(preferred_place);
cxx_config.set_valid_places(valid_places);
auto predictor = lite_api::CreatePaddlePredictor(cxx_config);
FillInputTensor(predictor, input_tensor_shape, 1);
......@@ -151,9 +149,7 @@ TEST(NPUSubgraph, compare) {
TestModel(FLAGS_model_dir,
FLAGS_model_file,
FLAGS_params_file,
lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
{lite_api::Place{TARGET(kHost), PRECISION(kFloat)},
lite_api::Place{TARGET(kARM), PRECISION(kFloat)}},
{lite_api::Place{TARGET(kARM), PRECISION(kFloat)}},
input_tensor_shape,
FLAGS_optimized_model_dir + "/CPU");
// generate and run optimized NPU model
......@@ -162,9 +158,7 @@ TEST(NPUSubgraph, compare) {
TestModel(FLAGS_model_dir,
FLAGS_model_file,
FLAGS_params_file,
lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
{lite_api::Place{TARGET(kHost), PRECISION(kFloat)},
lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
{lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}},
input_tensor_shape,
FLAGS_optimized_model_dir + "/NPU");
......
......@@ -63,18 +63,6 @@ TEST(variable_place_inference_pass, test) {
"type_target_cast_pass", //
});
Place prefered_place{
#ifdef PADDLE_WITH_CUDA
TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else
#ifdef PADDLE_WITH_ARM
TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else // X86
TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
#endif // ARM
#endif
};
optimizer.KernelPickPreferPlace(prefered_place);
optimizer.Run(std::move(program), places, factor, passes);
}
......
......@@ -117,13 +117,6 @@ class Optimizer {
exec_scope_ = program.exec_scope();
}
void KernelPickPreferPlace(const Place& place) {
auto* pass = mir::PassManager::Global().LookUp<mir::StaticKernelPickPass>(
"static_kernel_pick_pass");
CHECK(pass);
pass->SetPreferPlace(place);
}
const lite::Scope* exec_scope() const { return exec_scope_; }
// Generate a new program based on the mir graph.
......
......@@ -38,10 +38,8 @@ void RunModel() {
config.set_model_dir(FLAGS_model_dir);
std::vector<Place> valid_places{Place{TARGET(kARM), PRECISION(kFloat)}};
if (FLAGS_prefer_int8_kernel) {
valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)});
config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)});
} else {
config.set_preferred_place(Place{TARGET(kARM), PRECISION(kFloat)});
valid_places.insert(valid_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}
config.set_valid_places(valid_places);
......
......@@ -102,7 +102,7 @@ class Module {
void AddValidPlaceDecl() {
// clang-format off
Line("std::vector<lite::Place> valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)}), lite::Place({TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)})});"); // NOLINT
Line("std::vector<lite::Place> valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)})});"); // NOLINT
// clang-format on
}
......
......@@ -35,7 +35,6 @@ void Run(DebugConfig* conf) {
#endif
lite::Predictor predictor;
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
#ifdef LITE_WITH_ARM
Place{TARGET(kARM), PRECISION(kFloat)},
#endif
......@@ -60,23 +59,7 @@ void Run(DebugConfig* conf) {
"runtime_context_assign_pass",
}};
predictor.Build(conf->model_dir,
"",
"",
#ifdef LITE_WITH_ARM
Place{TARGET(kARM), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_X86
Place{TARGET(kX86), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_FPGA
Place{TARGET(kFPGA), PRECISION(kFloat)},
#endif
#ifdef LITE_WITH_CUDA
Place{TARGET(kCUDA), PRECISION(kFloat)},
#endif
valid_places,
passes);
predictor.Build(conf->model_dir, "", "", valid_places, passes);
predictor.GenRuntimeProgram();
auto& instructions = predictor.runtime_program().instructions();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册