diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f656e065a065ab65d461ba2901a548fcf9b4e42a..7771b24872fc7bac9b0a02c12b103b005da12dbe 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -78,6 +78,7 @@ build:mobile_android: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_ccache script: @@ -98,6 +99,7 @@ build:mobile_armlinux: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_ccache2 script: @@ -107,24 +109,13 @@ build:mobile_armlinux: dependencies: - build:server - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache build:mobile_model_mobilenetv1: tags: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv1 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv1 @@ -137,6 +128,7 @@ build:mobile_model_mobilenetv1: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_mobilenetv1 @@ -145,12 +137,7 @@ build:mobile_model_mobilenetv2: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv2 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv2 @@ -163,6 +150,7 @@ build:mobile_model_mobilenetv2: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_mobilenetv2 @@ -171,12 +159,7 @@ build:mobile_model_resnet50: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_resnet50 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_resnet50 @@ -189,6 +172,7 @@ build:mobile_model_resnet50: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_resnet50 diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index e2a8984b459ce135a81170bcc3f293deafc61bb6..c43f055cec278fb70f3027ba2044459efebbe663 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -24,8 +24,7 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME) ${EXTERNAL_PROJECT_NAME} ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${INSTALL_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} && - ${CMAKE_COMMAND} -E tar xzf ${INSTALL_DIR}/${FILENAME} + DOWNLOAD_COMMAND wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} && ${CMAKE_COMMAND} -E tar xzf ${INSTALL_DIR}/${FILENAME} DOWNLOAD_DIR ${INSTALL_DIR} DOWNLOAD_NO_PROGRESS 1 CONFIGURE_COMMAND "" @@ -143,6 +142,8 @@ function(lite_cc_binary TARGET) HVY_DEPS ${args_HVY_DEPS} ) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) + # collect targets need to compile for lite + add_dependencies(lite_compile_deps ${TARGET}) endfunction() # Add a unit-test name to file for latter offline manual test. diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index 3cac3eeba6d4aef3d7af88979e79ee0cbf5b2efe..4440acd61b8e64bfdeccf455f641cb57eb0cdcdf 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -12,7 +12,6 @@ lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc CUDA_DEPS kernels_cuda X86_DEPS ${x86_kernels} ) -lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper) set(light_api_deps scope_lite target_wrapper_host model_parser_lite program_lite) @@ -21,27 +20,34 @@ if(LITE_WITH_CUDA) set(light_api_deps ${light_api_deps} target_wrapper_cuda) endif() -lite_cc_library(light_api_lite SRCS light_api.cc - DEPS ${light_api_deps} ${ops_lite} ${host_kernels} - ) - message(STATUS "get ops ${ops_lite}") message(STATUS "get Host kernels ${host_kernels}") message(STATUS "get ARM kernels ${arm_kernels}") +lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} ${ops_lite} ${host_kernels} program_lite) + +lite_cc_library(light_api_lite SRCS light_api.cc + DEPS scope_lite target_wrapper_host model_parser_lite + ${light_api_deps} ${ops_lite} ${host_kernels} program_lite + CUDA_DEPS target_wrapper_cuda + X86_DEPS ${x86_kernels} operator + ARM_DEPS ${arm_kernels} + ) + include(ExternalProject) set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc - DEPS cxx_api_lite mir_passes + DEPS cxx_api_lite mir_passes lite_api_test_helper ${ops_lite} ${host_kernels} ${x86_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) endif() + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) set(lite_model_test_DEPS cxx_api_lite mir_passes ${ops_lite} ${host_kernels} ${arm_kernels}) @@ -68,25 +74,20 @@ endif() # These tests needs CLI arguments, and is not supported in ARM CI. # TODO(Superjomn) support latter. -if(NOT LITE_ON_MOBILE) - lite_cc_test(test_light_api SRCS light_api_test.cc - DEPS light_api_lite mir_passes - X86_DEPS ${x86_kernels} - ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt - SERIAL) +lite_cc_test(test_light_api SRCS light_api_test.cc + DEPS light_api_lite program_lite mir_passes + ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt + SERIAL) +if(NOT LITE_ON_MOBILE) lite_cc_test(test_apis_lite SRCS apis_test.cc - DEPS cxx_api_lite light_api_lite ${ops_lite} mir_passes - X86_DEPS ${x86_kernels} + DEPS cxx_api_lite light_api_lite ${ops_lite} + X86_DEPS ${x86_kernels} operator ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) endif() -lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc - DEPS - cxx_api_lite - model_parser_lite - target_wrapper_host - mir_passes - ${ops_lite} ${host_kernels} - ARM_DEPS ${arm_kernels}) +#lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc + #X86_DEPS operator + #DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes + #ARM_DEPS ${arm_kernels}) diff --git a/paddle/fluid/lite/api/apis_test.cc b/paddle/fluid/lite/api/apis_test.cc index 7dd6a1193754437a32957f081b3be3fd5c1fc403..0b8e9550a104aeda94147ecdb9032424aa0baab1 100644 --- a/paddle/fluid/lite/api/apis_test.cc +++ b/paddle/fluid/lite/api/apis_test.cc @@ -39,7 +39,7 @@ void SetConstInput(lite::Tensor* x) { } } -bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api, +bool CompareTensors(const std::string& name, const Predictor& cxx_api, const LightPredictor& light_api) { const auto* a = cxx_api.GetTensor(name); const auto* b = light_api.GetTensor(name); @@ -48,8 +48,8 @@ bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api, #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi_LightApi, save_and_load_model) { - lite::ExecutorLite cxx_api; - lite::LightPredictor light_api; + lite::Predictor cxx_api; + lite::LightPredictor light_api(FLAGS_optimized_model); // CXXAPi { @@ -69,8 +69,6 @@ TEST(CXXApi_LightApi, save_and_load_model) { // LightApi { - light_api.Build(FLAGS_optimized_model); - auto* x = light_api.GetInput(0); SetConstInput(x); diff --git a/paddle/fluid/lite/api/cxx_api.cc b/paddle/fluid/lite/api/cxx_api.cc index 1ea8be2c0b588ed58c82a70f4ef9263c46d15654..7c6ffccfa0bdab393c6870283834c76c5d1a2668 100644 --- a/paddle/fluid/lite/api/cxx_api.cc +++ b/paddle/fluid/lite/api/cxx_api.cc @@ -17,19 +17,49 @@ #include #include #include -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -#include "paddle/fluid/platform/port.h" -#endif +#include "paddle/fluid/lite/utils/io.h" namespace paddle { namespace lite { -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -void ExecutorLite::SaveModel(const std::string &dir) { - MkDirRecursively(dir.c_str()); +void Predictor::SaveModel(const std::string &dir) { +#ifndef LITE_WITH_ARM + LOG(INFO) << "Save model to " << dir; + MkDirRecur(dir); program_->PersistModel(dir, program_desc_); -} +#else + LOG(INFO) << "Save model to ./"; + program_->PersistModel("./", program_desc_); #endif +} + +lite::Tensor *Predictor::GetInput(size_t offset) { + auto *_feed_list = program_->exec_scope()->FindVar("feed"); + CHECK(_feed_list) << "no feed variable in exec_scope"; + auto *feed_list = _feed_list->GetMutable>(); + if (offset >= feed_list->size()) { + feed_list->resize(offset + 1); + } + return &feed_list->at(offset); +} + +const lite::Tensor *Predictor::GetOutput(size_t offset) { + auto *_fetch_list = program_->exec_scope()->FindVar("fetch"); + CHECK(_fetch_list) << "no fatch variable in exec_scope"; + auto &fetch_list = *_fetch_list->GetMutable>(); + CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; + return &fetch_list.at(offset); +} + +void Predictor::Build(const std::string &model_path, const Place &prefer_place, + const std::vector &valid_places) { + LoadModel(model_path, scope_.get(), &program_desc_); + Build(program_desc_, prefer_place, valid_places); +} + +const framework::proto::ProgramDesc &Predictor::program_desc() const { + return program_desc_; +} } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index 915a469a58765f102ff01c28ed9856d185311168..e7b74a04da25ba3d228aba78d9a5ce9d0909d708 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -26,20 +26,20 @@ namespace paddle { namespace lite { -struct Config {}; - -class ExecutorLite { +/* + * Predictor for inference, input a model, it will optimize and execute it. + */ +class Predictor { public: - ExecutorLite() { scope_ = std::make_shared(); } - explicit ExecutorLite(const std::shared_ptr& root_scope) { - scope_ = root_scope; - } + // Create an empty predictor. + Predictor() { scope_ = std::make_shared(); } + // Create a predictor with the weight variable scope set. + explicit Predictor(const std::shared_ptr& root_scope) + : scope_(root_scope) {} + // Build from a model, with places set for hardware config. void Build(const std::string& model_path, const Place& prefer_place, - const std::vector& valid_places) { - LoadModel(model_path, scope_.get(), &program_desc_); - Build(program_desc_, prefer_place, valid_places); - } + const std::vector& valid_places); void Build(const framework::proto::ProgramDesc& desc, const Place& prefer_place, @@ -55,40 +55,24 @@ class ExecutorLite { program_ = optimizer_.GenRuntimeProgram(); } -// This method is disabled in mobile, or unnecessary dependencies required. -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK - void SaveModel(const std::string& dir); -#endif + // Run the predictor for a single batch of data. + void Run() { program_->Run(); } - // Get offset-th col of feed. - lite::Tensor* GetInput(size_t offset) { - auto* _feed_list = program_->exec_scope()->FindVar("feed"); - CHECK(_feed_list) << "no feed variable in exec_scope"; - auto* feed_list = _feed_list->GetMutable>(); - if (offset >= feed_list->size()) { - feed_list->resize(offset + 1); - } - return &feed_list->at(offset); - } + // Get offset-th col of feed inputs. + lite::Tensor* GetInput(size_t offset); - const lite::Tensor* GetOutput(size_t offset) { - auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); - CHECK(_fetch_list) << "no fatch variable in exec_scope"; - auto& fetch_list = *_fetch_list->GetMutable>(); - CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; - return &fetch_list.at(offset); - } + // Get offset-th col of fetch results. + const lite::Tensor* GetOutput(size_t offset); + // Return the program desc for debug. + const framework::proto::ProgramDesc& program_desc() const; const lite::Tensor* GetTensor(const std::string& name) const { auto* var = program_->exec_scope()->FindVar(name); return &var->Get(); } - void Run() { program_->Run(); } - - const framework::proto::ProgramDesc& program_desc() const { - return program_desc_; - } + // This method is disabled in mobile, for unnecessary dependencies required. + void SaveModel(const std::string& dir); private: Optimizer optimizer_; @@ -97,6 +81,7 @@ class ExecutorLite { std::unique_ptr program_; }; +#ifdef LITE_WITH_X86 /* * An executor for training. * @@ -120,13 +105,13 @@ class CXXTrainer { : scope_(root_scope), preferred_place_(preferred_place), valid_places_(valid_places), - main_program_executor_(ExecutorLite(scope_)) {} + main_program_executor_(Predictor(scope_)) {} // Build the RuntimeProgram cache for the main program. The cache will run // multiple times for the epoches. // NOTE Just support to execute the 0-th block currently. - ExecutorLite& BuildMainProgramExecutor( - const framework::proto::ProgramDesc& desc, int block_id = 0) { + Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc, + int block_id = 0) { main_program_executor_.Build(desc, preferred_place_, valid_places_); return main_program_executor_; } @@ -134,7 +119,7 @@ class CXXTrainer { // Run the startup program. It just executes once, no cache needed. void RunStartupProgram(const framework::proto::ProgramDesc& desc, int block_id = 0) { - ExecutorLite exe(scope_); + Predictor exe(scope_); exe.Build(desc, preferred_place_, valid_places_); exe.Run(); } @@ -146,8 +131,9 @@ class CXXTrainer { std::vector valid_places_; // The training program. - ExecutorLite main_program_executor_; + Predictor main_program_executor_; }; +#endif } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index 58cf5dd785efc5de02e746e0ef1d5609a7c120a5..36f6ed45a10653aec74658a3c4794954d65dd1f5 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -34,7 +34,7 @@ void Run(const char* model_dir, int repeat, int thread_num) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num); #endif - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 093f8b73055fd0e9a8caed33430460b68cb8fbea..a1a028a5453a25f025bb55a4f81d4b94445480bb 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -42,7 +42,7 @@ TEST(CXXApi, test) { } TEST(CXXApi, save_model) { - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, diff --git a/paddle/fluid/lite/api/inceptionv4_test.cc b/paddle/fluid/lite/api/inceptionv4_test.cc index b0f0aaf3c13abe9e5fb02c8a47c29a66842008af..7908a8110045c53ad6f0b4f33702dccf58e5b1b5 100644 --- a/paddle/fluid/lite/api/inceptionv4_test.cc +++ b/paddle/fluid/lite/api/inceptionv4_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(InceptionV4, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/light_api.cc b/paddle/fluid/lite/api/light_api.cc index 9d3da3a5919e9cb07fbfc67dc0c7538d96775db4..6a7e20a053c8d355289f51636966f55eb429b897 100644 --- a/paddle/fluid/lite/api/light_api.cc +++ b/paddle/fluid/lite/api/light_api.cc @@ -13,3 +13,67 @@ // limitations under the License. #include "paddle/fluid/lite/api/light_api.h" + +namespace paddle { +namespace lite { + +void LightPredictor::Build(const std::string& model_dir) { + framework::proto::ProgramDesc desc; + LoadModel(model_dir, scope_.get(), &desc); + BuildRuntimeProgram(desc); +} + +Tensor* LightPredictor::GetInput(size_t offset) { + auto* _feed_list = program_->exec_scope()->FindVar("feed"); + CHECK(_feed_list) << "no feed variable in exec_scope"; + auto* feed_list = _feed_list->GetMutable>(); + if (offset >= feed_list->size()) { + feed_list->resize(offset + 1); + } + return &feed_list->at(offset); +} + +const Tensor* LightPredictor::GetOutput(size_t offset) { + auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); + CHECK(_fetch_list) << "no fatch variable in exec_scope"; + auto& fetch_list = *_fetch_list->GetMutable>(); + CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; + return &fetch_list.at(offset); +} + +void LightPredictor::BuildRuntimeProgram( + const framework::proto::ProgramDesc& prog) { + std::vector insts; + // 1. Create op first + Program program(prog, scope_, {}); + + // 2. Create Instructs + + // Create the kernels of the target places, and filter out the specific + // kernel with the target alias. + for (auto& op : program.ops()) { + auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); + std::string op_type, alias; + Place place; + KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); + auto kernels = op->CreateKernels({place}); + // filter out a kernel + auto it = std::find_if( + kernels.begin(), kernels.end(), + [&](std::unique_ptr& it) { return it->alias() == alias; }); + CHECK(it != kernels.end()); + (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); + insts.emplace_back(op, std::move(*it)); + } + program_.reset(new RuntimeProgram(std::move(insts))); + CHECK(program.exec_scope()); + program_->set_exec_scope(program.exec_scope()); +} + +LightPredictor::LightPredictor(const std::string& model_dir) { + scope_ = std::make_shared(); + Build(model_dir); +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/api/light_api.h b/paddle/fluid/lite/api/light_api.h index 5085909385c94e2e81b2cfa14167e8ce886060a3..bf1d7e95a3d90c8db3090815b150926551f63113 100644 --- a/paddle/fluid/lite/api/light_api.h +++ b/paddle/fluid/lite/api/light_api.h @@ -32,36 +32,21 @@ namespace paddle { namespace lite { +/* + * The light weight predictor, mainly for mobile. It loads an optimized model, + * and will not depend on the MIR or perform latter optimization. + */ class LightPredictor { public: - LightPredictor() { scope_ = std::make_shared(); } - - void Build(const std::string& model_dir) { - framework::proto::ProgramDesc desc; - LoadModel(model_dir, scope_.get(), &desc); - BuildRuntimeProgram(desc); - } + explicit LightPredictor(const std::string& model_dir); void Run() { program_->Run(); } - // Get offset-th col of feed. - Tensor* GetInput(size_t offset) { - auto* _feed_list = program_->exec_scope()->FindVar("feed"); - CHECK(_feed_list) << "no feed variable in exec_scope"; - auto* feed_list = _feed_list->GetMutable>(); - if (offset >= feed_list->size()) { - feed_list->resize(offset + 1); - } - return &feed_list->at(offset); - } + // Get offset-th col of feed inputs. + Tensor* GetInput(size_t offset); - const Tensor* GetOutput(size_t offset) { - auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); - CHECK(_fetch_list) << "no fatch variable in exec_scope"; - auto& fetch_list = *_fetch_list->GetMutable>(); - CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; - return &fetch_list.at(offset); - } + // Get offset-th col of fetch outputs. + const Tensor* GetOutput(size_t offset); const lite::Tensor* GetTensor(const std::string& name) const { auto* var = program_->exec_scope()->FindVar(name); @@ -69,34 +54,8 @@ class LightPredictor { } private: - void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) { - std::vector insts; - // 1. Create op first - Program program(prog, scope_, {}); - - // 2. Create Instructs - - // Create the kernels of the target places, and filter out the specific - // kernel with the target alias. - for (auto& op : program.ops()) { - auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); - std::string op_type, alias; - Place place; - KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); - auto kernels = op->CreateKernels({place}); - // filter out a kernel - auto it = std::find_if(kernels.begin(), kernels.end(), - [&](std::unique_ptr& it) { - return it->alias() == alias; - }); - CHECK(it != kernels.end()); - (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); - insts.emplace_back(op, std::move(*it)); - } - program_.reset(new RuntimeProgram(std::move(insts))); - CHECK(program.exec_scope()); - program_->set_exec_scope(program.exec_scope()); - } + void Build(const std::string& model_dir); + void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog); private: std::shared_ptr scope_; diff --git a/paddle/fluid/lite/api/light_api_test.cc b/paddle/fluid/lite/api/light_api_test.cc index faf53b8177a4d11fb33017599ecdb9dc650fbc43..d7e58fbe56cee4055c422af9a8881e664cc26605 100644 --- a/paddle/fluid/lite/api/light_api_test.cc +++ b/paddle/fluid/lite/api/light_api_test.cc @@ -25,8 +25,10 @@ namespace paddle { namespace lite { TEST(LightAPI, load) { - LightPredictor predictor; - predictor.Build(FLAGS_optimized_model); + if (FLAGS_optimized_model.empty()) { + FLAGS_optimized_model = "lite_naive_model"; + } + LightPredictor predictor(FLAGS_optimized_model); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({100, 100}))); diff --git a/paddle/fluid/lite/api/lite_api_test_helper.cc b/paddle/fluid/lite/api/lite_api_test_helper.cc index b82541723308f4748e28c64affa6899bf2d9b727..3c0835bc49b32a336848e9b9e88ea2afa3f1c698 100644 --- a/paddle/fluid/lite/api/lite_api_test_helper.cc +++ b/paddle/fluid/lite/api/lite_api_test_helper.cc @@ -22,7 +22,7 @@ namespace paddle { namespace lite { const lite::Tensor* RunHvyModel() { - lite::ExecutorLite predictor; + lite::Predictor predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/mobilenetv1_test.cc b/paddle/fluid/lite/api/mobilenetv1_test.cc index 527b387a4260b46f8033ce7e8a1b8b5ae91a7928..94935e8699643577b309fb294a18ea848a5ad567 100644 --- a/paddle/fluid/lite/api/mobilenetv1_test.cc +++ b/paddle/fluid/lite/api/mobilenetv1_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(MobileNetV1, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/mobilenetv2_test.cc b/paddle/fluid/lite/api/mobilenetv2_test.cc index 8a1ccdf4d37755559b80aba08010ec1ae6eb0578..0d615f61f267a612a32e5a0535d6272f2c867769 100644 --- a/paddle/fluid/lite/api/mobilenetv2_test.cc +++ b/paddle/fluid/lite/api/mobilenetv2_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(MobileNetV2, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/resnet50_test.cc b/paddle/fluid/lite/api/resnet50_test.cc index c4c214d6cdb462b7d95cbfd0f1787dab8d359a47..cb63ad83cab40842fe799496ff8881f51ea953ae 100644 --- a/paddle/fluid/lite/api/resnet50_test.cc +++ b/paddle/fluid/lite/api/resnet50_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(ResNet50, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 1e95668cddc722e32ea784fe2331380ea3a3940e..f6d48c2bea52040a924561812fb092df412a0c15 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -25,7 +25,7 @@ cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) cc_library(cpu_info_lite SRCS cpu_info.cc) lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3) -cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite +cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite cpp_op_desc_lite ${tensor_lite}) cc_library(types_lite SRCS types.cc) cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite) diff --git a/paddle/fluid/lite/core/kernel.cc b/paddle/fluid/lite/core/kernel.cc index 44b00f53d018ffe9431c7b481fb1bc1a6e1f7cdc..0dae1394290c34cddcf8b2f22868fa326f1974fd 100644 --- a/paddle/fluid/lite/core/kernel.cc +++ b/paddle/fluid/lite/core/kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/lite/core/kernel.h" +#include namespace paddle { namespace lite { @@ -49,6 +50,36 @@ std::string KernelBase::GenParamTypeKey() const { return ss.str(); } +void KernelBase::ParseKernelType(const std::string &kernel_type, + std::string *op_type, std::string *alias, + Place *place) { + std::stringstream ss(kernel_type); + std::getline(ss, *op_type, '/'); + std::getline(ss, *alias, '/'); + std::string target, precision, layout; + std::getline(ss, target, '/'); + std::getline(ss, precision, '/'); + std::getline(ss, layout, '/'); + + place->target = static_cast(std::atoi(target.c_str())); + place->precision = static_cast(std::atoi(precision.c_str())); + place->layout = static_cast(std::atoi(layout.c_str())); +} + +std::string KernelBase::SerializeKernelType(const std::string &op_type, + const std::string &alias, + const Place &place) { + std::stringstream ss; + ss << op_type << "/"; + ss << alias << "/"; + // We serialize the place value not the string representation here for + // easier deserialization. + ss << static_cast(place.target) << "/"; + ss << static_cast(place.precision) << "/"; + ss << static_cast(place.layout); + return ss.str(); +} + bool ParamTypeRegistry::KeyCmp::operator()( const ParamTypeRegistry::key_t &a, const ParamTypeRegistry::key_t &b) const { diff --git a/paddle/fluid/lite/core/kernel.h b/paddle/fluid/lite/core/kernel.h index d7b296eec12a27281b84701e1daa7ca09829fc47..0ef46b65870b11077dcda2cd1833b3eb67a562fa 100644 --- a/paddle/fluid/lite/core/kernel.h +++ b/paddle/fluid/lite/core/kernel.h @@ -118,33 +118,11 @@ class KernelBase { static std::string SerializeKernelType(const std::string& op_type, const std::string& alias, - const Place& place) { - std::stringstream ss; - ss << op_type << "/"; - ss << alias << "/"; - // We serialize the place value not the string representation here for - // easier deserialization. - ss << static_cast(place.target) << "/"; - ss << static_cast(place.precision) << "/"; - ss << static_cast(place.layout); - return ss.str(); - } + const Place& place); static void ParseKernelType(const std::string& kernel_type, std::string* op_type, std::string* alias, - Place* place) { - std::stringstream ss(kernel_type); - std::getline(ss, *op_type, '/'); - std::getline(ss, *alias, '/'); - std::string target, precision, layout; - std::getline(ss, target, '/'); - std::getline(ss, precision, '/'); - std::getline(ss, layout, '/'); - - place->target = static_cast(std::stoi(target)); - place->precision = static_cast(std::stoi(precision)); - place->layout = static_cast(std::stoi(layout)); - } + Place* place); virtual ~KernelBase() = default; void Torch() {} diff --git a/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc b/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc index 44189e3d1ed5e58807bb577a477a5ee68ac11a80..9d2c9fbc7dc9d0e7c591b189308795d3f783e112 100644 --- a/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc +++ b/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc @@ -28,7 +28,7 @@ namespace lite { namespace mir { TEST(fc_fuse_pass, fuse_test) { - lite::ExecutorLite predictor; + lite::Predictor predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); @@ -69,7 +69,7 @@ TEST(fc_fuse_pass, fuse_test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(fc_fuse_pass, save_model_test) { - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kX86), PRECISION(kFloat)}, diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 337fd846cbddac2fe53da1faf79b0479a215a576..21d3aa564acae69ecf3d50267fe916e6fc5432c6 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -51,5 +51,3 @@ set(arm_kernels ) set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") - - diff --git a/paddle/fluid/lite/kernels/arm/use_kernels.h b/paddle/fluid/lite/kernels/arm/use_kernels.h deleted file mode 100644 index 1a6583f3f570e688080b1bb1a96217c25ca4bcc9..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/arm/use_kernels.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/fluid/lite/core/op_registry.h" - -USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(pool, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(feed, kARM, kAny, kAny, def); -USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def); diff --git a/paddle/fluid/lite/kernels/use_kernels.h b/paddle/fluid/lite/kernels/use_kernels.h index d44069e14e0d6bcaf73c09d41e107d970d8acecb..09395abab523accd0bc4f95c75d0b9b23f1e8999 100644 --- a/paddle/fluid/lite/kernels/use_kernels.h +++ b/paddle/fluid/lite/kernels/use_kernels.h @@ -12,14 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once /* * ATTENTION this header file can only include in .cc file. */ +#pragma once +#include "paddle/fluid/lite/core/op_registry.h" + USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); +#ifdef LITE_WITH_ARM +USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(split, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(dropout, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(transpose, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(transpose2, kARM, kFloat, kNCHW, def); +#endif + #ifdef LITE_WITH_X86 USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); @@ -36,21 +55,6 @@ USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def); USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def); #endif -#ifdef LITE_WITH_ARM -USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(dropout, kARM, kFloat, kNCHW, def); -#endif - #ifdef LITE_WITH_CUDA USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index f66818b2e9dacd8e8aae3506a2f4f12b1b117cdb..fb3ea29260480738297d5416aab2d346412b3490 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -44,10 +44,9 @@ set(x86_kernels softmax_compute_x86 dropout_compute_x86 concat_compute_x86 - conv_compute_x86 - pool_compute_x86 - batch_norm_compute_x86 + conv_compute_x86 + pool_compute_x86 + batch_norm_compute_x86 ) set(x86_kernels "${x86_kernels}" CACHE INTERNAL "x86 kernels") - diff --git a/paddle/fluid/lite/operators/use_ops.h b/paddle/fluid/lite/operators/use_ops.h index 933b3c849a390c335bd914c476c61636c607aa41..316e08ad4784849865b3d7722dfb7d1935d51247 100644 --- a/paddle/fluid/lite/operators/use_ops.h +++ b/paddle/fluid/lite/operators/use_ops.h @@ -13,9 +13,10 @@ // limitations under the License. #pragma once -/* - * ATTENTION this header file can only include in .cc file. - */ + +// ATTENTION This can only include in a .cc file. + +#include "paddle/fluid/lite/core/op_registry.h" USE_LITE_OP(mul); USE_LITE_OP(fc); diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 4436d91cdfd782ac6cbed9768c85a7bf01bead71..5094cee5b4504105bf899d08ab420d2833022f9a 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -85,8 +85,8 @@ function build_test_server { # test_arm_android function test_arm_android { - test_name=$1 - port=$2 + local test_name=$1 + local port=$2 if [[ "${test_name}x" == "x" ]]; then echo "test_name can not be empty" exit 1 @@ -99,12 +99,18 @@ function test_arm_android { echo "test name: ${test_name}" adb_work_dir="/data/local/tmp" - skip_list=("test_model_parser_lite" "test_mobilenetv1_lite" "test_mobilenetv2_lite" "test_resnet50_lite" "test_inceptionv4_lite") + skip_list=("test_model_parser_lite" "test_mobilenetv1_lite" "test_mobilenetv2_lite" "test_resnet50_lite" "test_inceptionv4_lite" "test_light_api") for skip_name in ${skip_list[@]} ; do [[ $skip_name =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && echo "skip $test_name" && return done - testpath=$(find ./paddle/fluid -name ${test_name}) + local testpath=$(find ./paddle/fluid -name ${test_name}) + + # if [[ "$test_name" == "test_light_api" ]]; then + # local model_path=$(find . -name "lite_naive_model") + # arm_push_necessary_file $port $model_path $adb_work_dir + # fi + adb -s emulator-${port} push ${testpath} ${adb_work_dir} adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}" adb -s emulator-${port} shell "./${adb_work_dir}/${test_name}" @@ -204,6 +210,7 @@ function test_arm { abi=$2 lang=$3 port=$4 + if [[ ${os} == "armlinux" ]]; then # TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf echo "Skip test arm linux yet. armlinux must in another docker" @@ -221,6 +228,7 @@ function test_arm { return 0 fi + echo "test file: ${TESTS_FILE}" for _test in $(cat $TESTS_FILE); do test_arm_android $_test $port @@ -242,6 +250,14 @@ function prepare_emulator { sleep 1m } +function arm_push_necessary_file { + local port=$1 + local testpath=$2 + local adb_work_dir=$3 + + adb -s emulator-${port} push ${testpath} ${adb_work_dir} +} + # We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime. # sub-task1 @@ -286,20 +302,22 @@ function build_test_arm_subtask_armlinux { prepare_emulator $port_armv8 $port_armv7 + cur=$PWD + # job 5 - build_arm "armlinux" "armv8" - test_arm "armlinux" "armv8" - cd - + build_arm "armlinux" "armv8" "gcc" $port_armv8 + test_arm "armlinux" "armv8" "gcc" $port_armv8 + cd $cur # job 6 - build_arm "armlinux" "armv7" - test_arm "armlinux" "armv7" - cd - + build_arm "armlinux" "armv7" "gcc" $port_armv8 + test_arm "armlinux" "armv7" "gcc" $port_armv8 + cd $cur # job 7 - build_arm "armlinux" "armv7hf" - test_arm "armlinux" "armv7hf" - cd - + build_arm "armlinux" "armv7hf" "gcc" $port_armv8 + test_arm "armlinux" "armv7hf" "gcc" $port_armv8 + cd $cur adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done echo "Done" diff --git a/paddle/fluid/lite/utils/io.h b/paddle/fluid/lite/utils/io.h index 4dba6f984292235d3f947477b09152bc37c2adb9..4e64ee1d4e4b016fadf40167fb96557e96061fba 100644 --- a/paddle/fluid/lite/utils/io.h +++ b/paddle/fluid/lite/utils/io.h @@ -14,15 +14,18 @@ #pragma once -#include +#ifndef LITE_WITH_ARM +#include +#endif #include #include #include "paddle/fluid/lite/utils/cp_logging.h" +#include "paddle/fluid/lite/utils/string.h" namespace paddle { namespace lite { -static bool IsFileExists(const std::string &path) { +static bool IsFileExists(const std::string& path) { std::ifstream file(path); bool res = file.is_open(); if (res) { @@ -31,5 +34,13 @@ static bool IsFileExists(const std::string &path) { return res; } +// ARM mobile not support mkdir in C++ +#ifndef LITE_WITH_ARM +static void MkDirRecur(const std::string& path) { + CHECK_EQ(system(string_format("mkdir -p %s", path.c_str()).c_str()), 0) + << "Cann't mkdir " << path; +} +#endif + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/utils/string.h b/paddle/fluid/lite/utils/string.h index 31b131276bfa220f85a9a7606d504b6d330425b2..5e918bf5f841b3f8d18ccf9ff94721534ec6a698 100644 --- a/paddle/fluid/lite/utils/string.h +++ b/paddle/fluid/lite/utils/string.h @@ -74,5 +74,15 @@ static std::string Repr(const std::vector& v) { return "{" + Join(tmp, ",") + "}"; } +static std::vector Split(const std::string& s, char delim) { + std::stringstream ss(s); + std::string line; + std::vector res; + while (std::getline(ss, line, delim)) { + res.push_back(line); + } + return res; +} + } // namespace lite } // namespace paddle