From b22226929fda661a3b1bc18eeb8799bdc7392301 Mon Sep 17 00:00:00 2001 From: Chunwei Date: Fri, 21 Jun 2019 12:03:37 +0000 Subject: [PATCH] refactor api and recover CI cache --- .gitlab-ci.yml | 32 +++------ paddle/fluid/lite/CMakeLists.txt | 5 +- paddle/fluid/lite/api/CMakeLists.txt | 45 ++++++------ paddle/fluid/lite/api/apis_test.cc | 8 +-- paddle/fluid/lite/api/cxx_api.cc | 44 ++++++++++-- paddle/fluid/lite/api/cxx_api.h | 70 ++++++++----------- paddle/fluid/lite/api/cxx_api_bin.cc | 2 +- paddle/fluid/lite/api/cxx_api_test.cc | 2 +- paddle/fluid/lite/api/inceptionv4_test.cc | 2 +- paddle/fluid/lite/api/light_api.cc | 64 +++++++++++++++++ paddle/fluid/lite/api/light_api.h | 63 +++-------------- paddle/fluid/lite/api/light_api_test.cc | 6 +- paddle/fluid/lite/api/lite_api_test_helper.cc | 2 +- paddle/fluid/lite/api/mobilenetv1_test.cc | 2 +- paddle/fluid/lite/api/mobilenetv2_test.cc | 2 +- paddle/fluid/lite/api/resnet50_test.cc | 2 +- paddle/fluid/lite/core/CMakeLists.txt | 2 +- paddle/fluid/lite/core/kernel.cc | 31 ++++++++ paddle/fluid/lite/core/kernel.h | 26 +------ .../lite/core/mir/fusion/fc_fuse_pass_test.cc | 4 +- paddle/fluid/lite/kernels/arm/CMakeLists.txt | 2 - paddle/fluid/lite/kernels/arm/use_kernels.h | 25 ------- paddle/fluid/lite/kernels/use_kernels.h | 36 +++++----- paddle/fluid/lite/kernels/x86/CMakeLists.txt | 7 +- paddle/fluid/lite/operators/use_ops.h | 7 +- paddle/fluid/lite/tools/build.sh | 44 ++++++++---- paddle/fluid/lite/utils/io.h | 15 +++- paddle/fluid/lite/utils/string.h | 10 +++ 28 files changed, 305 insertions(+), 255 deletions(-) delete mode 100644 paddle/fluid/lite/kernels/arm/use_kernels.h diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f656e065a..7771b2487 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -78,6 +78,7 @@ build:mobile_android: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_ccache script: @@ -98,6 +99,7 @@ build:mobile_armlinux: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_ccache2 script: @@ -107,24 +109,13 @@ build:mobile_armlinux: dependencies: - build:server - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache build:mobile_model_mobilenetv1: tags: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv1 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv1 @@ -137,6 +128,7 @@ build:mobile_model_mobilenetv1: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_mobilenetv1 @@ -145,12 +137,7 @@ build:mobile_model_mobilenetv2: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv2 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv2 @@ -163,6 +150,7 @@ build:mobile_model_mobilenetv2: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_mobilenetv2 @@ -171,12 +159,7 @@ build:mobile_model_resnet50: - lite stage: build_mobile image: $MOBILE_LITE_DOCKER_IMAGE - cache: - key: mobile_thirdparty - paths: - - $MOBILE_LITE_CACHE0 - - $MOBILE_LITE_CACHE1 - - ~/.ccache + script: - export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_resnet50 - ./paddle/fluid/lite/tools/build.sh build_test_arm_model_resnet50 @@ -189,6 +172,7 @@ build:mobile_model_resnet50: paths: - $MOBILE_LITE_CACHE0 - $MOBILE_LITE_CACHE1 + - $MOBILE_LITE_CACHE2 - ~/.ccache - $CI_PROJECT_DIR/build_mobile_model_resnet50 diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index e2a8984b4..c43f055ce 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -24,8 +24,7 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME) ${EXTERNAL_PROJECT_NAME} ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${INSTALL_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} && - ${CMAKE_COMMAND} -E tar xzf ${INSTALL_DIR}/${FILENAME} + DOWNLOAD_COMMAND wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} && ${CMAKE_COMMAND} -E tar xzf ${INSTALL_DIR}/${FILENAME} DOWNLOAD_DIR ${INSTALL_DIR} DOWNLOAD_NO_PROGRESS 1 CONFIGURE_COMMAND "" @@ -143,6 +142,8 @@ function(lite_cc_binary TARGET) HVY_DEPS ${args_HVY_DEPS} ) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) + # collect targets need to compile for lite + add_dependencies(lite_compile_deps ${TARGET}) endfunction() # Add a unit-test name to file for latter offline manual test. diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index 3cac3eeba..4440acd61 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -12,7 +12,6 @@ lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc CUDA_DEPS kernels_cuda X86_DEPS ${x86_kernels} ) -lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper) set(light_api_deps scope_lite target_wrapper_host model_parser_lite program_lite) @@ -21,27 +20,34 @@ if(LITE_WITH_CUDA) set(light_api_deps ${light_api_deps} target_wrapper_cuda) endif() -lite_cc_library(light_api_lite SRCS light_api.cc - DEPS ${light_api_deps} ${ops_lite} ${host_kernels} - ) - message(STATUS "get ops ${ops_lite}") message(STATUS "get Host kernels ${host_kernels}") message(STATUS "get ARM kernels ${arm_kernels}") +lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} ${ops_lite} ${host_kernels} program_lite) + +lite_cc_library(light_api_lite SRCS light_api.cc + DEPS scope_lite target_wrapper_host model_parser_lite + ${light_api_deps} ${ops_lite} ${host_kernels} program_lite + CUDA_DEPS target_wrapper_cuda + X86_DEPS ${x86_kernels} operator + ARM_DEPS ${arm_kernels} + ) + include(ExternalProject) set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc - DEPS cxx_api_lite mir_passes + DEPS cxx_api_lite mir_passes lite_api_test_helper ${ops_lite} ${host_kernels} ${x86_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) endif() + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) set(lite_model_test_DEPS cxx_api_lite mir_passes ${ops_lite} ${host_kernels} ${arm_kernels}) @@ -68,25 +74,20 @@ endif() # These tests needs CLI arguments, and is not supported in ARM CI. # TODO(Superjomn) support latter. -if(NOT LITE_ON_MOBILE) - lite_cc_test(test_light_api SRCS light_api_test.cc - DEPS light_api_lite mir_passes - X86_DEPS ${x86_kernels} - ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt - SERIAL) +lite_cc_test(test_light_api SRCS light_api_test.cc + DEPS light_api_lite program_lite mir_passes + ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt + SERIAL) +if(NOT LITE_ON_MOBILE) lite_cc_test(test_apis_lite SRCS apis_test.cc - DEPS cxx_api_lite light_api_lite ${ops_lite} mir_passes - X86_DEPS ${x86_kernels} + DEPS cxx_api_lite light_api_lite ${ops_lite} + X86_DEPS ${x86_kernels} operator ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) endif() -lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc - DEPS - cxx_api_lite - model_parser_lite - target_wrapper_host - mir_passes - ${ops_lite} ${host_kernels} - ARM_DEPS ${arm_kernels}) +#lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc + #X86_DEPS operator + #DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes + #ARM_DEPS ${arm_kernels}) diff --git a/paddle/fluid/lite/api/apis_test.cc b/paddle/fluid/lite/api/apis_test.cc index 7dd6a1193..0b8e9550a 100644 --- a/paddle/fluid/lite/api/apis_test.cc +++ b/paddle/fluid/lite/api/apis_test.cc @@ -39,7 +39,7 @@ void SetConstInput(lite::Tensor* x) { } } -bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api, +bool CompareTensors(const std::string& name, const Predictor& cxx_api, const LightPredictor& light_api) { const auto* a = cxx_api.GetTensor(name); const auto* b = light_api.GetTensor(name); @@ -48,8 +48,8 @@ bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api, #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi_LightApi, save_and_load_model) { - lite::ExecutorLite cxx_api; - lite::LightPredictor light_api; + lite::Predictor cxx_api; + lite::LightPredictor light_api(FLAGS_optimized_model); // CXXAPi { @@ -69,8 +69,6 @@ TEST(CXXApi_LightApi, save_and_load_model) { // LightApi { - light_api.Build(FLAGS_optimized_model); - auto* x = light_api.GetInput(0); SetConstInput(x); diff --git a/paddle/fluid/lite/api/cxx_api.cc b/paddle/fluid/lite/api/cxx_api.cc index 1ea8be2c0..7c6ffccfa 100644 --- a/paddle/fluid/lite/api/cxx_api.cc +++ b/paddle/fluid/lite/api/cxx_api.cc @@ -17,19 +17,49 @@ #include #include #include -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -#include "paddle/fluid/platform/port.h" -#endif +#include "paddle/fluid/lite/utils/io.h" namespace paddle { namespace lite { -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -void ExecutorLite::SaveModel(const std::string &dir) { - MkDirRecursively(dir.c_str()); +void Predictor::SaveModel(const std::string &dir) { +#ifndef LITE_WITH_ARM + LOG(INFO) << "Save model to " << dir; + MkDirRecur(dir); program_->PersistModel(dir, program_desc_); -} +#else + LOG(INFO) << "Save model to ./"; + program_->PersistModel("./", program_desc_); #endif +} + +lite::Tensor *Predictor::GetInput(size_t offset) { + auto *_feed_list = program_->exec_scope()->FindVar("feed"); + CHECK(_feed_list) << "no feed variable in exec_scope"; + auto *feed_list = _feed_list->GetMutable>(); + if (offset >= feed_list->size()) { + feed_list->resize(offset + 1); + } + return &feed_list->at(offset); +} + +const lite::Tensor *Predictor::GetOutput(size_t offset) { + auto *_fetch_list = program_->exec_scope()->FindVar("fetch"); + CHECK(_fetch_list) << "no fatch variable in exec_scope"; + auto &fetch_list = *_fetch_list->GetMutable>(); + CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; + return &fetch_list.at(offset); +} + +void Predictor::Build(const std::string &model_path, const Place &prefer_place, + const std::vector &valid_places) { + LoadModel(model_path, scope_.get(), &program_desc_); + Build(program_desc_, prefer_place, valid_places); +} + +const framework::proto::ProgramDesc &Predictor::program_desc() const { + return program_desc_; +} } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index 915a469a5..e7b74a04d 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -26,20 +26,20 @@ namespace paddle { namespace lite { -struct Config {}; - -class ExecutorLite { +/* + * Predictor for inference, input a model, it will optimize and execute it. + */ +class Predictor { public: - ExecutorLite() { scope_ = std::make_shared(); } - explicit ExecutorLite(const std::shared_ptr& root_scope) { - scope_ = root_scope; - } + // Create an empty predictor. + Predictor() { scope_ = std::make_shared(); } + // Create a predictor with the weight variable scope set. + explicit Predictor(const std::shared_ptr& root_scope) + : scope_(root_scope) {} + // Build from a model, with places set for hardware config. void Build(const std::string& model_path, const Place& prefer_place, - const std::vector& valid_places) { - LoadModel(model_path, scope_.get(), &program_desc_); - Build(program_desc_, prefer_place, valid_places); - } + const std::vector& valid_places); void Build(const framework::proto::ProgramDesc& desc, const Place& prefer_place, @@ -55,40 +55,24 @@ class ExecutorLite { program_ = optimizer_.GenRuntimeProgram(); } -// This method is disabled in mobile, or unnecessary dependencies required. -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK - void SaveModel(const std::string& dir); -#endif + // Run the predictor for a single batch of data. + void Run() { program_->Run(); } - // Get offset-th col of feed. - lite::Tensor* GetInput(size_t offset) { - auto* _feed_list = program_->exec_scope()->FindVar("feed"); - CHECK(_feed_list) << "no feed variable in exec_scope"; - auto* feed_list = _feed_list->GetMutable>(); - if (offset >= feed_list->size()) { - feed_list->resize(offset + 1); - } - return &feed_list->at(offset); - } + // Get offset-th col of feed inputs. + lite::Tensor* GetInput(size_t offset); - const lite::Tensor* GetOutput(size_t offset) { - auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); - CHECK(_fetch_list) << "no fatch variable in exec_scope"; - auto& fetch_list = *_fetch_list->GetMutable>(); - CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; - return &fetch_list.at(offset); - } + // Get offset-th col of fetch results. + const lite::Tensor* GetOutput(size_t offset); + // Return the program desc for debug. + const framework::proto::ProgramDesc& program_desc() const; const lite::Tensor* GetTensor(const std::string& name) const { auto* var = program_->exec_scope()->FindVar(name); return &var->Get(); } - void Run() { program_->Run(); } - - const framework::proto::ProgramDesc& program_desc() const { - return program_desc_; - } + // This method is disabled in mobile, for unnecessary dependencies required. + void SaveModel(const std::string& dir); private: Optimizer optimizer_; @@ -97,6 +81,7 @@ class ExecutorLite { std::unique_ptr program_; }; +#ifdef LITE_WITH_X86 /* * An executor for training. * @@ -120,13 +105,13 @@ class CXXTrainer { : scope_(root_scope), preferred_place_(preferred_place), valid_places_(valid_places), - main_program_executor_(ExecutorLite(scope_)) {} + main_program_executor_(Predictor(scope_)) {} // Build the RuntimeProgram cache for the main program. The cache will run // multiple times for the epoches. // NOTE Just support to execute the 0-th block currently. - ExecutorLite& BuildMainProgramExecutor( - const framework::proto::ProgramDesc& desc, int block_id = 0) { + Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc, + int block_id = 0) { main_program_executor_.Build(desc, preferred_place_, valid_places_); return main_program_executor_; } @@ -134,7 +119,7 @@ class CXXTrainer { // Run the startup program. It just executes once, no cache needed. void RunStartupProgram(const framework::proto::ProgramDesc& desc, int block_id = 0) { - ExecutorLite exe(scope_); + Predictor exe(scope_); exe.Build(desc, preferred_place_, valid_places_); exe.Run(); } @@ -146,8 +131,9 @@ class CXXTrainer { std::vector valid_places_; // The training program. - ExecutorLite main_program_executor_; + Predictor main_program_executor_; }; +#endif } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index 58cf5dd78..36f6ed45a 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -34,7 +34,7 @@ void Run(const char* model_dir, int repeat, int thread_num) { DeviceInfo::Init(); DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num); #endif - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 093f8b730..a1a028a54 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -42,7 +42,7 @@ TEST(CXXApi, test) { } TEST(CXXApi, save_model) { - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, diff --git a/paddle/fluid/lite/api/inceptionv4_test.cc b/paddle/fluid/lite/api/inceptionv4_test.cc index b0f0aaf3c..7908a8110 100644 --- a/paddle/fluid/lite/api/inceptionv4_test.cc +++ b/paddle/fluid/lite/api/inceptionv4_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(InceptionV4, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/light_api.cc b/paddle/fluid/lite/api/light_api.cc index 9d3da3a59..6a7e20a05 100644 --- a/paddle/fluid/lite/api/light_api.cc +++ b/paddle/fluid/lite/api/light_api.cc @@ -13,3 +13,67 @@ // limitations under the License. #include "paddle/fluid/lite/api/light_api.h" + +namespace paddle { +namespace lite { + +void LightPredictor::Build(const std::string& model_dir) { + framework::proto::ProgramDesc desc; + LoadModel(model_dir, scope_.get(), &desc); + BuildRuntimeProgram(desc); +} + +Tensor* LightPredictor::GetInput(size_t offset) { + auto* _feed_list = program_->exec_scope()->FindVar("feed"); + CHECK(_feed_list) << "no feed variable in exec_scope"; + auto* feed_list = _feed_list->GetMutable>(); + if (offset >= feed_list->size()) { + feed_list->resize(offset + 1); + } + return &feed_list->at(offset); +} + +const Tensor* LightPredictor::GetOutput(size_t offset) { + auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); + CHECK(_fetch_list) << "no fatch variable in exec_scope"; + auto& fetch_list = *_fetch_list->GetMutable>(); + CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; + return &fetch_list.at(offset); +} + +void LightPredictor::BuildRuntimeProgram( + const framework::proto::ProgramDesc& prog) { + std::vector insts; + // 1. Create op first + Program program(prog, scope_, {}); + + // 2. Create Instructs + + // Create the kernels of the target places, and filter out the specific + // kernel with the target alias. + for (auto& op : program.ops()) { + auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); + std::string op_type, alias; + Place place; + KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); + auto kernels = op->CreateKernels({place}); + // filter out a kernel + auto it = std::find_if( + kernels.begin(), kernels.end(), + [&](std::unique_ptr& it) { return it->alias() == alias; }); + CHECK(it != kernels.end()); + (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); + insts.emplace_back(op, std::move(*it)); + } + program_.reset(new RuntimeProgram(std::move(insts))); + CHECK(program.exec_scope()); + program_->set_exec_scope(program.exec_scope()); +} + +LightPredictor::LightPredictor(const std::string& model_dir) { + scope_ = std::make_shared(); + Build(model_dir); +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/api/light_api.h b/paddle/fluid/lite/api/light_api.h index 508590938..bf1d7e95a 100644 --- a/paddle/fluid/lite/api/light_api.h +++ b/paddle/fluid/lite/api/light_api.h @@ -32,36 +32,21 @@ namespace paddle { namespace lite { +/* + * The light weight predictor, mainly for mobile. It loads an optimized model, + * and will not depend on the MIR or perform latter optimization. + */ class LightPredictor { public: - LightPredictor() { scope_ = std::make_shared(); } - - void Build(const std::string& model_dir) { - framework::proto::ProgramDesc desc; - LoadModel(model_dir, scope_.get(), &desc); - BuildRuntimeProgram(desc); - } + explicit LightPredictor(const std::string& model_dir); void Run() { program_->Run(); } - // Get offset-th col of feed. - Tensor* GetInput(size_t offset) { - auto* _feed_list = program_->exec_scope()->FindVar("feed"); - CHECK(_feed_list) << "no feed variable in exec_scope"; - auto* feed_list = _feed_list->GetMutable>(); - if (offset >= feed_list->size()) { - feed_list->resize(offset + 1); - } - return &feed_list->at(offset); - } + // Get offset-th col of feed inputs. + Tensor* GetInput(size_t offset); - const Tensor* GetOutput(size_t offset) { - auto* _fetch_list = program_->exec_scope()->FindVar("fetch"); - CHECK(_fetch_list) << "no fatch variable in exec_scope"; - auto& fetch_list = *_fetch_list->GetMutable>(); - CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow"; - return &fetch_list.at(offset); - } + // Get offset-th col of fetch outputs. + const Tensor* GetOutput(size_t offset); const lite::Tensor* GetTensor(const std::string& name) const { auto* var = program_->exec_scope()->FindVar(name); @@ -69,34 +54,8 @@ class LightPredictor { } private: - void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) { - std::vector insts; - // 1. Create op first - Program program(prog, scope_, {}); - - // 2. Create Instructs - - // Create the kernels of the target places, and filter out the specific - // kernel with the target alias. - for (auto& op : program.ops()) { - auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); - std::string op_type, alias; - Place place; - KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); - auto kernels = op->CreateKernels({place}); - // filter out a kernel - auto it = std::find_if(kernels.begin(), kernels.end(), - [&](std::unique_ptr& it) { - return it->alias() == alias; - }); - CHECK(it != kernels.end()); - (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); - insts.emplace_back(op, std::move(*it)); - } - program_.reset(new RuntimeProgram(std::move(insts))); - CHECK(program.exec_scope()); - program_->set_exec_scope(program.exec_scope()); - } + void Build(const std::string& model_dir); + void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog); private: std::shared_ptr scope_; diff --git a/paddle/fluid/lite/api/light_api_test.cc b/paddle/fluid/lite/api/light_api_test.cc index faf53b817..d7e58fbe5 100644 --- a/paddle/fluid/lite/api/light_api_test.cc +++ b/paddle/fluid/lite/api/light_api_test.cc @@ -25,8 +25,10 @@ namespace paddle { namespace lite { TEST(LightAPI, load) { - LightPredictor predictor; - predictor.Build(FLAGS_optimized_model); + if (FLAGS_optimized_model.empty()) { + FLAGS_optimized_model = "lite_naive_model"; + } + LightPredictor predictor(FLAGS_optimized_model); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({100, 100}))); diff --git a/paddle/fluid/lite/api/lite_api_test_helper.cc b/paddle/fluid/lite/api/lite_api_test_helper.cc index b82541723..3c0835bc4 100644 --- a/paddle/fluid/lite/api/lite_api_test_helper.cc +++ b/paddle/fluid/lite/api/lite_api_test_helper.cc @@ -22,7 +22,7 @@ namespace paddle { namespace lite { const lite::Tensor* RunHvyModel() { - lite::ExecutorLite predictor; + lite::Predictor predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/mobilenetv1_test.cc b/paddle/fluid/lite/api/mobilenetv1_test.cc index 527b387a4..94935e869 100644 --- a/paddle/fluid/lite/api/mobilenetv1_test.cc +++ b/paddle/fluid/lite/api/mobilenetv1_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(MobileNetV1, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/mobilenetv2_test.cc b/paddle/fluid/lite/api/mobilenetv2_test.cc index 8a1ccdf4d..0d615f61f 100644 --- a/paddle/fluid/lite/api/mobilenetv2_test.cc +++ b/paddle/fluid/lite/api/mobilenetv2_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(MobileNetV2, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/api/resnet50_test.cc b/paddle/fluid/lite/api/resnet50_test.cc index c4c214d6c..cb63ad83c 100644 --- a/paddle/fluid/lite/api/resnet50_test.cc +++ b/paddle/fluid/lite/api/resnet50_test.cc @@ -30,7 +30,7 @@ namespace lite { #ifdef LITE_WITH_ARM TEST(ResNet50, test) { DeviceInfo::Init(); - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}}); diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 1e95668cd..f6d48c2be 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -25,7 +25,7 @@ cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) cc_library(cpu_info_lite SRCS cpu_info.cc) lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3) -cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite +cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite cpp_op_desc_lite ${tensor_lite}) cc_library(types_lite SRCS types.cc) cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite) diff --git a/paddle/fluid/lite/core/kernel.cc b/paddle/fluid/lite/core/kernel.cc index 44b00f53d..0dae13942 100644 --- a/paddle/fluid/lite/core/kernel.cc +++ b/paddle/fluid/lite/core/kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/lite/core/kernel.h" +#include namespace paddle { namespace lite { @@ -49,6 +50,36 @@ std::string KernelBase::GenParamTypeKey() const { return ss.str(); } +void KernelBase::ParseKernelType(const std::string &kernel_type, + std::string *op_type, std::string *alias, + Place *place) { + std::stringstream ss(kernel_type); + std::getline(ss, *op_type, '/'); + std::getline(ss, *alias, '/'); + std::string target, precision, layout; + std::getline(ss, target, '/'); + std::getline(ss, precision, '/'); + std::getline(ss, layout, '/'); + + place->target = static_cast(std::atoi(target.c_str())); + place->precision = static_cast(std::atoi(precision.c_str())); + place->layout = static_cast(std::atoi(layout.c_str())); +} + +std::string KernelBase::SerializeKernelType(const std::string &op_type, + const std::string &alias, + const Place &place) { + std::stringstream ss; + ss << op_type << "/"; + ss << alias << "/"; + // We serialize the place value not the string representation here for + // easier deserialization. + ss << static_cast(place.target) << "/"; + ss << static_cast(place.precision) << "/"; + ss << static_cast(place.layout); + return ss.str(); +} + bool ParamTypeRegistry::KeyCmp::operator()( const ParamTypeRegistry::key_t &a, const ParamTypeRegistry::key_t &b) const { diff --git a/paddle/fluid/lite/core/kernel.h b/paddle/fluid/lite/core/kernel.h index d7b296eec..0ef46b658 100644 --- a/paddle/fluid/lite/core/kernel.h +++ b/paddle/fluid/lite/core/kernel.h @@ -118,33 +118,11 @@ class KernelBase { static std::string SerializeKernelType(const std::string& op_type, const std::string& alias, - const Place& place) { - std::stringstream ss; - ss << op_type << "/"; - ss << alias << "/"; - // We serialize the place value not the string representation here for - // easier deserialization. - ss << static_cast(place.target) << "/"; - ss << static_cast(place.precision) << "/"; - ss << static_cast(place.layout); - return ss.str(); - } + const Place& place); static void ParseKernelType(const std::string& kernel_type, std::string* op_type, std::string* alias, - Place* place) { - std::stringstream ss(kernel_type); - std::getline(ss, *op_type, '/'); - std::getline(ss, *alias, '/'); - std::string target, precision, layout; - std::getline(ss, target, '/'); - std::getline(ss, precision, '/'); - std::getline(ss, layout, '/'); - - place->target = static_cast(std::stoi(target)); - place->precision = static_cast(std::stoi(precision)); - place->layout = static_cast(std::stoi(layout)); - } + Place* place); virtual ~KernelBase() = default; void Torch() {} diff --git a/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc b/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc index 44189e3d1..9d2c9fbc7 100644 --- a/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc +++ b/paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc @@ -28,7 +28,7 @@ namespace lite { namespace mir { TEST(fc_fuse_pass, fuse_test) { - lite::ExecutorLite predictor; + lite::Predictor predictor; #ifndef LITE_WITH_CUDA std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); @@ -69,7 +69,7 @@ TEST(fc_fuse_pass, fuse_test) { #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(fc_fuse_pass, save_model_test) { - lite::ExecutorLite predictor; + lite::Predictor predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kX86), PRECISION(kFloat)}}); predictor.Build(FLAGS_model_dir, Place{TARGET(kX86), PRECISION(kFloat)}, diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 337fd846c..21d3aa564 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -51,5 +51,3 @@ set(arm_kernels ) set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") - - diff --git a/paddle/fluid/lite/kernels/arm/use_kernels.h b/paddle/fluid/lite/kernels/arm/use_kernels.h deleted file mode 100644 index 1a6583f3f..000000000 --- a/paddle/fluid/lite/kernels/arm/use_kernels.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/fluid/lite/core/op_registry.h" - -USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(pool, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(feed, kARM, kAny, kAny, def); -USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def); diff --git a/paddle/fluid/lite/kernels/use_kernels.h b/paddle/fluid/lite/kernels/use_kernels.h index d44069e14..09395abab 100644 --- a/paddle/fluid/lite/kernels/use_kernels.h +++ b/paddle/fluid/lite/kernels/use_kernels.h @@ -12,14 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once /* * ATTENTION this header file can only include in .cc file. */ +#pragma once +#include "paddle/fluid/lite/core/op_registry.h" + USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); +#ifdef LITE_WITH_ARM +USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(split, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(dropout, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(transpose, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(transpose2, kARM, kFloat, kNCHW, def); +#endif + #ifdef LITE_WITH_X86 USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); @@ -36,21 +55,6 @@ USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def); USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def); #endif -#ifdef LITE_WITH_ARM -USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); -USE_LITE_KERNEL(dropout, kARM, kFloat, kNCHW, def); -#endif - #ifdef LITE_WITH_CUDA USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index f66818b2e..fb3ea2926 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -44,10 +44,9 @@ set(x86_kernels softmax_compute_x86 dropout_compute_x86 concat_compute_x86 - conv_compute_x86 - pool_compute_x86 - batch_norm_compute_x86 + conv_compute_x86 + pool_compute_x86 + batch_norm_compute_x86 ) set(x86_kernels "${x86_kernels}" CACHE INTERNAL "x86 kernels") - diff --git a/paddle/fluid/lite/operators/use_ops.h b/paddle/fluid/lite/operators/use_ops.h index 933b3c849..316e08ad4 100644 --- a/paddle/fluid/lite/operators/use_ops.h +++ b/paddle/fluid/lite/operators/use_ops.h @@ -13,9 +13,10 @@ // limitations under the License. #pragma once -/* - * ATTENTION this header file can only include in .cc file. - */ + +// ATTENTION This can only include in a .cc file. + +#include "paddle/fluid/lite/core/op_registry.h" USE_LITE_OP(mul); USE_LITE_OP(fc); diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 4436d91cd..5094cee5b 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -85,8 +85,8 @@ function build_test_server { # test_arm_android function test_arm_android { - test_name=$1 - port=$2 + local test_name=$1 + local port=$2 if [[ "${test_name}x" == "x" ]]; then echo "test_name can not be empty" exit 1 @@ -99,12 +99,18 @@ function test_arm_android { echo "test name: ${test_name}" adb_work_dir="/data/local/tmp" - skip_list=("test_model_parser_lite" "test_mobilenetv1_lite" "test_mobilenetv2_lite" "test_resnet50_lite" "test_inceptionv4_lite") + skip_list=("test_model_parser_lite" "test_mobilenetv1_lite" "test_mobilenetv2_lite" "test_resnet50_lite" "test_inceptionv4_lite" "test_light_api") for skip_name in ${skip_list[@]} ; do [[ $skip_name =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && echo "skip $test_name" && return done - testpath=$(find ./paddle/fluid -name ${test_name}) + local testpath=$(find ./paddle/fluid -name ${test_name}) + + # if [[ "$test_name" == "test_light_api" ]]; then + # local model_path=$(find . -name "lite_naive_model") + # arm_push_necessary_file $port $model_path $adb_work_dir + # fi + adb -s emulator-${port} push ${testpath} ${adb_work_dir} adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}" adb -s emulator-${port} shell "./${adb_work_dir}/${test_name}" @@ -204,6 +210,7 @@ function test_arm { abi=$2 lang=$3 port=$4 + if [[ ${os} == "armlinux" ]]; then # TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf echo "Skip test arm linux yet. armlinux must in another docker" @@ -221,6 +228,7 @@ function test_arm { return 0 fi + echo "test file: ${TESTS_FILE}" for _test in $(cat $TESTS_FILE); do test_arm_android $_test $port @@ -242,6 +250,14 @@ function prepare_emulator { sleep 1m } +function arm_push_necessary_file { + local port=$1 + local testpath=$2 + local adb_work_dir=$3 + + adb -s emulator-${port} push ${testpath} ${adb_work_dir} +} + # We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime. # sub-task1 @@ -286,20 +302,22 @@ function build_test_arm_subtask_armlinux { prepare_emulator $port_armv8 $port_armv7 + cur=$PWD + # job 5 - build_arm "armlinux" "armv8" - test_arm "armlinux" "armv8" - cd - + build_arm "armlinux" "armv8" "gcc" $port_armv8 + test_arm "armlinux" "armv8" "gcc" $port_armv8 + cd $cur # job 6 - build_arm "armlinux" "armv7" - test_arm "armlinux" "armv7" - cd - + build_arm "armlinux" "armv7" "gcc" $port_armv8 + test_arm "armlinux" "armv7" "gcc" $port_armv8 + cd $cur # job 7 - build_arm "armlinux" "armv7hf" - test_arm "armlinux" "armv7hf" - cd - + build_arm "armlinux" "armv7hf" "gcc" $port_armv8 + test_arm "armlinux" "armv7hf" "gcc" $port_armv8 + cd $cur adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done echo "Done" diff --git a/paddle/fluid/lite/utils/io.h b/paddle/fluid/lite/utils/io.h index 4dba6f984..4e64ee1d4 100644 --- a/paddle/fluid/lite/utils/io.h +++ b/paddle/fluid/lite/utils/io.h @@ -14,15 +14,18 @@ #pragma once -#include +#ifndef LITE_WITH_ARM +#include +#endif #include #include #include "paddle/fluid/lite/utils/cp_logging.h" +#include "paddle/fluid/lite/utils/string.h" namespace paddle { namespace lite { -static bool IsFileExists(const std::string &path) { +static bool IsFileExists(const std::string& path) { std::ifstream file(path); bool res = file.is_open(); if (res) { @@ -31,5 +34,13 @@ static bool IsFileExists(const std::string &path) { return res; } +// ARM mobile not support mkdir in C++ +#ifndef LITE_WITH_ARM +static void MkDirRecur(const std::string& path) { + CHECK_EQ(system(string_format("mkdir -p %s", path.c_str()).c_str()), 0) + << "Cann't mkdir " << path; +} +#endif + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/utils/string.h b/paddle/fluid/lite/utils/string.h index 31b131276..5e918bf5f 100644 --- a/paddle/fluid/lite/utils/string.h +++ b/paddle/fluid/lite/utils/string.h @@ -74,5 +74,15 @@ static std::string Repr(const std::vector& v) { return "{" + Join(tmp, ",") + "}"; } +static std::vector Split(const std::string& s, char delim) { + std::stringstream ss(s); + std::string line; + std::vector res; + while (std::getline(ss, line, delim)) { + res.push_back(line); + } + return res; +} + } // namespace lite } // namespace paddle -- GitLab