未验证 提交 2cee3058 编写于 作者: Y Yan Chunwei 提交者: GitHub

Lite/fix mobile combile2 (#18004)

上级 cbf59cbb
...@@ -43,7 +43,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) ...@@ -43,7 +43,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(NOT DEFINED TARGET_ARCH_ABI) if(NOT DEFINED TARGET_ARCH_ABI)
set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform") set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
endif() endif()
include(cross_compiling/host) include(cross_compiling/host)
include(cross_compiling/armlinux) include(cross_compiling/armlinux)
include(cross_compiling/android) include(cross_compiling/android)
......
...@@ -79,6 +79,10 @@ function (lite_deps TARGET) ...@@ -79,6 +79,10 @@ function (lite_deps TARGET)
endfunction() endfunction()
# Add names for lite libraries for latter compile. We use this name list to avoid compiling
# the whole fluid project to accelerate the compile speed.
set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt")
file(WRITE ${offline_lib_registry_file} "") # clean
# cc_library with branch support. # cc_library with branch support.
# The branches: # The branches:
# X86_DEPS: works only when LITE_WITH_X86 is ON. # X86_DEPS: works only when LITE_WITH_X86 is ON.
...@@ -106,6 +110,9 @@ function(lite_cc_library TARGET) ...@@ -106,6 +110,9 @@ function(lite_cc_library TARGET)
) )
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS}) cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
# register a library name.
file(APPEND ${offline_lib_registry_file} "${TARGET}\n")
endfunction() endfunction()
function(lite_cc_binary TARGET) function(lite_cc_binary TARGET)
...@@ -131,10 +138,6 @@ endfunction() ...@@ -131,10 +138,6 @@ endfunction()
# Add a unit-test name to file for latter offline manual test. # Add a unit-test name to file for latter offline manual test.
set(offline_test_registry_file "${CMAKE_BINARY_DIR}/lite_tests.txt") set(offline_test_registry_file "${CMAKE_BINARY_DIR}/lite_tests.txt")
file(WRITE ${offline_test_registry_file} "") # clean file(WRITE ${offline_test_registry_file} "") # clean
function (register_test_offline TARGET)
file(APPEND ${offline_test_registry_file} "${TARGET}\n")
endfunction()
# Test lite modules. # Test lite modules.
function(lite_cc_test TARGET) function(lite_cc_test TARGET)
set(options "") set(options "")
...@@ -155,7 +158,7 @@ function(lite_cc_test TARGET) ...@@ -155,7 +158,7 @@ function(lite_cc_test TARGET)
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
) )
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS}) _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
register_test_offline("${TARGET}") file(APPEND ${offline_test_registry_file} "${TARGET}\n")
endfunction() endfunction()
add_subdirectory(core) add_subdirectory(core)
......
...@@ -19,7 +19,7 @@ endif() ...@@ -19,7 +19,7 @@ endif()
proto_library(framework_proto_lite SRCS framework.proto) proto_library(framework_proto_lite SRCS framework.proto)
cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite) cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
cc_library(variable_lite SRCS variable.cc) cc_library(variable_lite SRCS variable.cc)
cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite) cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite}) cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
...@@ -30,7 +30,7 @@ cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapp ...@@ -30,7 +30,7 @@ cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapp
cc_library(types_lite SRCS types.cc) cc_library(types_lite SRCS types.cc)
cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite) cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite) lite_cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto)
cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite) cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
add_subdirectory(mir) add_subdirectory(mir)
......
...@@ -28,31 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS ...@@ -28,31 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
mir_pass_manager mir_pass_manager
program_fake_utils program_fake_utils
) )
set(test_variable_place_infrence_pass_DEPS # lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
mul_op_lite # DEPS
feed_op_lite # mul_op_lite
fetch_op_lite # feed_op_lite
io_copy_op_lite # fetch_op_lite
${host_kernels} # io_copy_op_lite
mir_passes # ${host_kernels}
mir_pass_manager # mir_passes
optimizer_lite # mir_pass_manager
program_fake_utils # optimizer_lite
target_wrapper_host # program_fake_utils
) # target_wrapper_host
if (LITE_WITH_CUDA) # PROFILE_DEPS basic_profiler_lite
set(test_variable_place_infrence_pass_DEPS # CUDA_DEPS target_wrapper_cuda kernels_cuda
${test_variable_place_infrence_pass_DEPS} target_wrapper_cuda # ARM_DEPS mul_compute_arm
kernels_cuda # X86_DEPS mul_compute_x86
) # )
endif()
cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
${test_variable_place_infrence_pass_DEPS}) lite_cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite)
lite_cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite) lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite)
cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite) # TODO(wz) replace framework/proto to lite proto.
cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite # it depends on the fluid/framework/proto, that is too heavy for mobile execution.
mir_passes compatible_pb_lite program_lite ${ops_lite}) lite_cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
mir_passes compatible_pb_lite program_lite ${ops_lite})
endif()
...@@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
if (!node.IsStmt()) continue; if (!node.IsStmt()) continue;
auto& instruct = node.AsStmt(); auto& instruct = node.AsStmt();
std::vector<std::pair<size_t, std::unique_ptr<KernelBase>>> scored; std::vector<std::pair<size_t, std::unique_ptr<KernelBase>>> scored;
CHECK(!instruct.valid_kernels.empty()) << "No kernels found for "
<< instruct.op_type;
for (auto&& kernel : instruct.valid_kernels) { for (auto&& kernel : instruct.valid_kernels) {
size_t score = KernelGrade(*kernel); size_t score = KernelGrade(*kernel);
scored.emplace_back(score, std::move(kernel)); scored.emplace_back(score, std::move(kernel));
......
...@@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) { ...@@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) {
Place{ Place{
TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
}, },
Place{
TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
},
Place{
TARGET(kX86), PRECISION(kAny), DATALAYOUT(kAny),
},
}); });
Program program(*desc->Proto(), scope, places); Program program(*desc->Proto(), scope, places);
...@@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) { ...@@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) {
}); });
Place prefered_place{ Place prefered_place{
#ifdef PADDLE_WITH_CUDA
TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW), TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else
#ifdef PADDLE_WITH_ARM
TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else // X86
TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
#endif // ARM
#endif
}; };
optimizer.KernelPickPreferPlace(prefered_place); optimizer.KernelPickPreferPlace(prefered_place);
optimizer.Run(std::move(program), places, factor, passes); optimizer.Run(std::move(program), places, factor, passes);
...@@ -72,3 +86,16 @@ USE_LITE_OP(mul); ...@@ -72,3 +86,16 @@ USE_LITE_OP(mul);
USE_LITE_OP(feed); USE_LITE_OP(feed);
USE_LITE_OP(fetch); USE_LITE_OP(fetch);
USE_LITE_OP(io_copy); USE_LITE_OP(io_copy);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#endif
...@@ -55,8 +55,14 @@ struct Program { ...@@ -55,8 +55,14 @@ struct Program {
const std::list<std::string>& weights() const { return weights_; } const std::list<std::string>& weights() const { return weights_; }
const std::list<std::string>& tmp_vars() const { return tmp_vars_; } const std::list<std::string>& tmp_vars() const { return tmp_vars_; }
std::list<std::string>* mutable_weights() { return &weights_; }
std::list<std::string>* mutable_tmp_vars() { return &tmp_vars_; }
const std::list<std::shared_ptr<OpLite>>& ops() const { return ops_; } const std::list<std::shared_ptr<OpLite>>& ops() const { return ops_; }
std::list<std::shared_ptr<OpLite>>* mutable_ops() { return &ops_; }
lite::Scope* exec_scope() { return exec_scope_; } lite::Scope* exec_scope() { return exec_scope_; }
lite::Scope* scope() { return scope_.get(); }
private: private:
// Build from a program and scope. // Build from a program and scope.
......
...@@ -33,11 +33,11 @@ Program FakeProgram() { ...@@ -33,11 +33,11 @@ Program FakeProgram() {
std::string w1 = "w" + std::to_string(id); std::string w1 = "w" + std::to_string(id);
std::string b1 = "b" + std::to_string(id); std::string b1 = "b" + std::to_string(id);
std::string out1 = "out" + std::to_string(id); std::string out1 = "out" + std::to_string(id);
auto w1v = program.scope_->Var(w1)->GetMutable<lite::Tensor>(); auto w1v = program.scope()->Var(w1)->GetMutable<lite::Tensor>();
auto b1v = program.scope_->Var(b1)->GetMutable<lite::Tensor>(); auto b1v = program.scope()->Var(b1)->GetMutable<lite::Tensor>();
auto out1v = program.scope_->Var(out1)->GetMutable<lite::Tensor>(); auto out1v = program.scope()->Var(out1)->GetMutable<lite::Tensor>();
lite::OpDesc desc; cpp::OpDesc desc;
desc.SetInput("Input", {x}); desc.SetInput("Input", {x});
desc.SetInput("W", {w1}); desc.SetInput("W", {w1});
desc.SetInput("Bias", {b1}); desc.SetInput("Bias", {b1});
...@@ -46,12 +46,12 @@ Program FakeProgram() { ...@@ -46,12 +46,12 @@ Program FakeProgram() {
desc.SetAttr("in_num_col_dims", 1); desc.SetAttr("in_num_col_dims", 1);
// add to input // add to input
program.tmp_vars_.push_back(w1); program.mutable_tmp_vars()->push_back(w1);
program.tmp_vars_.push_back(b1); program.mutable_tmp_vars()->push_back(b1);
auto fc_op = LiteOpRegistry::Global().Create("fc"); auto fc_op = LiteOpRegistry::Global().Create("fc");
fc_op->Attach(desc, program.scope_.get()); fc_op->Attach(desc, program.scope());
program.ops_.emplace_back(std::move(fc_op)); program.mutable_ops()->emplace_back(std::move(fc_op));
w1v->Resize(DDimHvy(std::vector<int64_t>({100, 100}))); w1v->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
b1v->Resize(DDimHvy(std::vector<int64_t>({100, 1}))); b1v->Resize(DDimHvy(std::vector<int64_t>({100, 1})));
...@@ -64,8 +64,8 @@ Program FakeProgram() { ...@@ -64,8 +64,8 @@ Program FakeProgram() {
// out1, w2, b2 -fc-> out2 // out1, w2, b2 -fc-> out2
std::string x = "x"; std::string x = "x";
program.tmp_vars_.push_back(x); program.mutable_tmp_vars()->push_back(x);
auto* xv = program.scope_->Var(x)->GetMutable<lite::Tensor>(); auto* xv = program.scope()->Var(x)->GetMutable<lite::Tensor>();
xv->Resize(DDimHvy(std::vector<int64_t>({100, 100}))); xv->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
......
lite_cc_library(gen_code_lite SRCS gen_code.cc lite_cc_library(gen_code_lite SRCS gen_code.cc
DEPS program_lite op_lite scope DEPS program_lite op_lite scope_lite
cpp_op_desc_lite cpp_op_desc_lite
HVY_DEPS operator) HVY_DEPS operator)
lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite) lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite)
lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
mul_op_lite
compatible_pb_lite
model_parser_lite
X86_DEPS mul_compute_x86
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
mul_op_lite
compatible_pb_lite
model_parser_lite
X86_DEPS mul_compute_x86
ARM_DEPS mul_compute_arm
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_library(__generated_code__ lite_cc_library(__generated_code__
SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc
DEPS scope_lite op_lite kernel_lite paddle_infer_gencode DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
......
...@@ -136,4 +136,10 @@ TEST(gen_code, optimized_program) { ...@@ -136,4 +136,10 @@ TEST(gen_code, optimized_program) {
} // namespace paddle } // namespace paddle
USE_LITE_OP(mul); USE_LITE_OP(mul);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
#endif
...@@ -12,4 +12,4 @@ set(host_kernels ...@@ -12,4 +12,4 @@ set(host_kernels
reshape_compute_host reshape_compute_host
) )
set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels") set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels")
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
set -ex set -ex
TESTS_FILE="./lite_tests.txt" TESTS_FILE="./lite_tests.txt"
LIBS_FILE="./lite_libs.txt"
readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF" readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
...@@ -42,18 +43,21 @@ function cmake_arm { ...@@ -42,18 +43,21 @@ function cmake_arm {
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
} }
# function build { function build {
# file=$1 file=$1
# for _test in $(cat $file); do for _test in $(cat $file); do
# make $_test -j$(expr $(nproc) - 2) make $_test -j$(expr $(nproc) - 2)
# done done
# } }
# It will eagerly test all lite related unittests. # It will eagerly test all lite related unittests.
function test_lite { function test_lite {
local file=$1 local file=$1
echo "file: ${file}" echo "file: ${file}"
for _test in $(cat $file); do for _test in $(cat $file); do
# We move the build phase here to make the 'gen_code' test compiles after the
# corresponding test is executed and the C++ code generates.
make $_test -j$(expr $(nproc) - 2) make $_test -j$(expr $(nproc) - 2)
ctest -R $_test -V ctest -R $_test -V
done done
...@@ -98,8 +102,10 @@ function build_test_server { ...@@ -98,8 +102,10 @@ function build_test_server {
cd ./build cd ./build
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
cmake_x86_for_CI cmake_x86_for_CI
#build $TESTS_FILE # compile the tests and execute them.
test_lite $TESTS_FILE test_lite $TESTS_FILE
# build the remaining libraries to check compiling error.
build $LIBS_FILE
} }
# Build the code and run lite server tests. This is executed in the CI system. # Build the code and run lite server tests. This is executed in the CI system.
...@@ -129,7 +135,6 @@ function build_test_arm { ...@@ -129,7 +135,6 @@ function build_test_arm {
build_dir=build.lite.${os}.${abi} build_dir=build.lite.${os}.${abi}
mkdir -p $build_dir mkdir -p $build_dir
cd $build_dir cd $build_dir
cmake_arm ${os} ${abi} cmake_arm ${os} ${abi}
build $TESTS_FILE build $TESTS_FILE
...@@ -177,10 +182,11 @@ function main { ...@@ -177,10 +182,11 @@ function main {
TESTS_FILE="${i#*=}" TESTS_FILE="${i#*=}"
shift shift
;; ;;
# build) build)
# build $TESTS_FILE build $TESTS_FILE
# shift build $LIBS_FILE
# ;; shift
;;
cmake_x86) cmake_x86)
cmake_x86 cmake_x86
shift shift
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册