From b8572aa388128ce8049762ffb52dadca35c24760 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Tue, 11 Jun 2019 21:56:24 +0800 Subject: [PATCH] Lite/fix mobile combile2 (#18004) --- CMakeLists.txt | 2 +- paddle/fluid/lite/CMakeLists.txt | 13 +++-- paddle/fluid/lite/core/CMakeLists.txt | 4 +- paddle/fluid/lite/core/mir/CMakeLists.txt | 55 ++++++++++--------- .../lite/core/mir/static_kernel_pick_pass.cc | 2 + .../mir/variable_place_inference_pass_test.cc | 27 +++++++++ paddle/fluid/lite/core/program.h | 6 ++ paddle/fluid/lite/core/program_fake_utils.h | 20 +++---- paddle/fluid/lite/gen_code/CMakeLists.txt | 17 +++--- paddle/fluid/lite/gen_code/gen_code_test.cc | 6 ++ paddle/fluid/lite/kernels/host/CMakeLists.txt | 2 +- paddle/fluid/lite/tools/build.sh | 30 ++++++---- 12 files changed, 119 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ef4a4c35..036a5faf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) if(NOT DEFINED TARGET_ARCH_ABI) set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform") endif() - + include(cross_compiling/host) include(cross_compiling/armlinux) include(cross_compiling/android) diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index 2c263cc4f..ac9ff84da 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -79,6 +79,10 @@ function (lite_deps TARGET) endfunction() +# Add names for lite libraries for latter compile. We use this name list to avoid compiling +# the whole fluid project to accelerate the compile speed. +set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt") +file(WRITE ${offline_lib_registry_file} "") # clean # cc_library with branch support. # The branches: # X86_DEPS: works only when LITE_WITH_X86 is ON. @@ -106,6 +110,9 @@ function(lite_cc_library TARGET) ) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) + + # register a library name. + file(APPEND ${offline_lib_registry_file} "${TARGET}\n") endfunction() function(lite_cc_binary TARGET) @@ -131,10 +138,6 @@ endfunction() # Add a unit-test name to file for latter offline manual test. set(offline_test_registry_file "${CMAKE_BINARY_DIR}/lite_tests.txt") file(WRITE ${offline_test_registry_file} "") # clean -function (register_test_offline TARGET) - file(APPEND ${offline_test_registry_file} "${TARGET}\n") -endfunction() - # Test lite modules. function(lite_cc_test TARGET) set(options "") @@ -155,7 +158,7 @@ function(lite_cc_test TARGET) HVY_DEPS ${args_HVY_DEPS} ) _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS}) - register_test_offline("${TARGET}") + file(APPEND ${offline_test_registry_file} "${TARGET}\n") endfunction() add_subdirectory(core) diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index a71420b8c..e5aef8d84 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -19,7 +19,7 @@ endif() proto_library(framework_proto_lite SRCS framework.proto) -cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite) +cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite}) cc_library(variable_lite SRCS variable.cc) cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) @@ -30,7 +30,7 @@ cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapp cc_library(types_lite SRCS types.cc) cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite) -cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite) +lite_cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto) cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite) add_subdirectory(mir) diff --git a/paddle/fluid/lite/core/mir/CMakeLists.txt b/paddle/fluid/lite/core/mir/CMakeLists.txt index 26dc50ab7..84cba88d1 100644 --- a/paddle/fluid/lite/core/mir/CMakeLists.txt +++ b/paddle/fluid/lite/core/mir/CMakeLists.txt @@ -28,31 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS mir_pass_manager program_fake_utils ) -set(test_variable_place_infrence_pass_DEPS - mul_op_lite - feed_op_lite - fetch_op_lite - io_copy_op_lite - ${host_kernels} - mir_passes - mir_pass_manager - optimizer_lite - program_fake_utils - target_wrapper_host - ) -if (LITE_WITH_CUDA) - set(test_variable_place_infrence_pass_DEPS - ${test_variable_place_infrence_pass_DEPS} target_wrapper_cuda - kernels_cuda - ) -endif() -cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS - ${test_variable_place_infrence_pass_DEPS}) +# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc +# DEPS +# mul_op_lite +# feed_op_lite +# fetch_op_lite +# io_copy_op_lite +# ${host_kernels} +# mir_passes +# mir_pass_manager +# optimizer_lite +# program_fake_utils +# target_wrapper_host +# PROFILE_DEPS basic_profiler_lite +# CUDA_DEPS target_wrapper_cuda kernels_cuda +# ARM_DEPS mul_compute_arm +# X86_DEPS mul_compute_x86 +# ) + + +lite_cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite) +lite_cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite) -cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite) -cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite) +lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite) -cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite) -cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS - pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite - mir_passes compatible_pb_lite program_lite ${ops_lite}) +# TODO(wz) replace framework/proto to lite proto. +if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + # it depends on the fluid/framework/proto, that is too heavy for mobile execution. + lite_cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS + pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite + mir_passes compatible_pb_lite program_lite ${ops_lite}) +endif() diff --git a/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc b/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc index a35996644..9d48c123a 100644 --- a/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc +++ b/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc @@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr& graph) { if (!node.IsStmt()) continue; auto& instruct = node.AsStmt(); std::vector>> scored; + CHECK(!instruct.valid_kernels.empty()) << "No kernels found for " + << instruct.op_type; for (auto&& kernel : instruct.valid_kernels) { size_t score = KernelGrade(*kernel); scored.emplace_back(score, std::move(kernel)); diff --git a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc index 9c33ff698..d6b8561c3 100644 --- a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc +++ b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc @@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) { Place{ TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), }, + Place{ + TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW), + }, + Place{ + TARGET(kX86), PRECISION(kAny), DATALAYOUT(kAny), + }, }); Program program(*desc->Proto(), scope, places); @@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) { }); Place prefered_place{ +#ifdef PADDLE_WITH_CUDA TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), +#else +#ifdef PADDLE_WITH_ARM + TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW), +#else // X86 + TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW), +#endif // ARM +#endif }; optimizer.KernelPickPreferPlace(prefered_place); optimizer.Run(std::move(program), places, factor, passes); @@ -72,3 +86,16 @@ USE_LITE_OP(mul); USE_LITE_OP(feed); USE_LITE_OP(fetch); USE_LITE_OP(io_copy); + +#ifdef LITE_WITH_X86 +USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); +#endif + +#ifdef LITE_WITH_ARM +USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); +#endif + +#ifdef LITE_WITH_CUDA +USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); +USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host); +#endif diff --git a/paddle/fluid/lite/core/program.h b/paddle/fluid/lite/core/program.h index 450a625bc..4f2f65d3f 100644 --- a/paddle/fluid/lite/core/program.h +++ b/paddle/fluid/lite/core/program.h @@ -55,8 +55,14 @@ struct Program { const std::list& weights() const { return weights_; } const std::list& tmp_vars() const { return tmp_vars_; } + std::list* mutable_weights() { return &weights_; } + std::list* mutable_tmp_vars() { return &tmp_vars_; } + const std::list>& ops() const { return ops_; } + std::list>* mutable_ops() { return &ops_; } + lite::Scope* exec_scope() { return exec_scope_; } + lite::Scope* scope() { return scope_.get(); } private: // Build from a program and scope. diff --git a/paddle/fluid/lite/core/program_fake_utils.h b/paddle/fluid/lite/core/program_fake_utils.h index a18fa9f17..b36e47bf1 100644 --- a/paddle/fluid/lite/core/program_fake_utils.h +++ b/paddle/fluid/lite/core/program_fake_utils.h @@ -33,11 +33,11 @@ Program FakeProgram() { std::string w1 = "w" + std::to_string(id); std::string b1 = "b" + std::to_string(id); std::string out1 = "out" + std::to_string(id); - auto w1v = program.scope_->Var(w1)->GetMutable(); - auto b1v = program.scope_->Var(b1)->GetMutable(); - auto out1v = program.scope_->Var(out1)->GetMutable(); + auto w1v = program.scope()->Var(w1)->GetMutable(); + auto b1v = program.scope()->Var(b1)->GetMutable(); + auto out1v = program.scope()->Var(out1)->GetMutable(); - lite::OpDesc desc; + cpp::OpDesc desc; desc.SetInput("Input", {x}); desc.SetInput("W", {w1}); desc.SetInput("Bias", {b1}); @@ -46,12 +46,12 @@ Program FakeProgram() { desc.SetAttr("in_num_col_dims", 1); // add to input - program.tmp_vars_.push_back(w1); - program.tmp_vars_.push_back(b1); + program.mutable_tmp_vars()->push_back(w1); + program.mutable_tmp_vars()->push_back(b1); auto fc_op = LiteOpRegistry::Global().Create("fc"); - fc_op->Attach(desc, program.scope_.get()); - program.ops_.emplace_back(std::move(fc_op)); + fc_op->Attach(desc, program.scope()); + program.mutable_ops()->emplace_back(std::move(fc_op)); w1v->Resize(DDimHvy(std::vector({100, 100}))); b1v->Resize(DDimHvy(std::vector({100, 1}))); @@ -64,8 +64,8 @@ Program FakeProgram() { // out1, w2, b2 -fc-> out2 std::string x = "x"; - program.tmp_vars_.push_back(x); - auto* xv = program.scope_->Var(x)->GetMutable(); + program.mutable_tmp_vars()->push_back(x); + auto* xv = program.scope()->Var(x)->GetMutable(); xv->Resize(DDimHvy(std::vector({100, 100}))); for (int i = 0; i < 3; i++) { diff --git a/paddle/fluid/lite/gen_code/CMakeLists.txt b/paddle/fluid/lite/gen_code/CMakeLists.txt index f7c38c176..bacfc3e98 100644 --- a/paddle/fluid/lite/gen_code/CMakeLists.txt +++ b/paddle/fluid/lite/gen_code/CMakeLists.txt @@ -1,17 +1,18 @@ lite_cc_library(gen_code_lite SRCS gen_code.cc - DEPS program_lite op_lite scope + DEPS program_lite op_lite scope_lite cpp_op_desc_lite HVY_DEPS operator) lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite) -lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite} - mul_op_lite - compatible_pb_lite - model_parser_lite - X86_DEPS mul_compute_x86 - ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) - if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite} + mul_op_lite + compatible_pb_lite + model_parser_lite + X86_DEPS mul_compute_x86 + ARM_DEPS mul_compute_arm + ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) + lite_cc_library(__generated_code__ SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc DEPS scope_lite op_lite kernel_lite paddle_infer_gencode diff --git a/paddle/fluid/lite/gen_code/gen_code_test.cc b/paddle/fluid/lite/gen_code/gen_code_test.cc index 96ef56e85..c27b775c0 100644 --- a/paddle/fluid/lite/gen_code/gen_code_test.cc +++ b/paddle/fluid/lite/gen_code/gen_code_test.cc @@ -136,4 +136,10 @@ TEST(gen_code, optimized_program) { } // namespace paddle USE_LITE_OP(mul); +#ifdef LITE_WITH_X86 USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); +#endif + +#ifdef LITE_WITH_ARM +USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); +#endif diff --git a/paddle/fluid/lite/kernels/host/CMakeLists.txt b/paddle/fluid/lite/kernels/host/CMakeLists.txt index 03c0023cb..a71a8e13a 100644 --- a/paddle/fluid/lite/kernels/host/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/host/CMakeLists.txt @@ -12,4 +12,4 @@ set(host_kernels reshape_compute_host ) -set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels") +set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels") diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 5afbc003c..2a31f8d1f 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -2,6 +2,7 @@ set -ex TESTS_FILE="./lite_tests.txt" +LIBS_FILE="./lite_libs.txt" readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF" @@ -42,18 +43,21 @@ function cmake_arm { -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 } -# function build { -# file=$1 -# for _test in $(cat $file); do -# make $_test -j$(expr $(nproc) - 2) -# done -# } +function build { + file=$1 + for _test in $(cat $file); do + make $_test -j$(expr $(nproc) - 2) + done +} # It will eagerly test all lite related unittests. function test_lite { local file=$1 echo "file: ${file}" + for _test in $(cat $file); do + # We move the build phase here to make the 'gen_code' test compiles after the + # corresponding test is executed and the C++ code generates. make $_test -j$(expr $(nproc) - 2) ctest -R $_test -V done @@ -98,8 +102,10 @@ function build_test_server { cd ./build export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib" cmake_x86_for_CI - #build $TESTS_FILE + # compile the tests and execute them. test_lite $TESTS_FILE + # build the remaining libraries to check compiling error. + build $LIBS_FILE } # Build the code and run lite server tests. This is executed in the CI system. @@ -129,7 +135,6 @@ function build_test_arm { build_dir=build.lite.${os}.${abi} mkdir -p $build_dir cd $build_dir - cmake_arm ${os} ${abi} build $TESTS_FILE @@ -177,10 +182,11 @@ function main { TESTS_FILE="${i#*=}" shift ;; - # build) - # build $TESTS_FILE - # shift - # ;; + build) + build $TESTS_FILE + build $LIBS_FILE + shift + ;; cmake_x86) cmake_x86 shift -- GitLab