提交 b8572aa3 编写于 作者: Y Yan Chunwei 提交者: GitHub

Lite/fix mobile combile2 (#18004)

上级 fb600267
......@@ -43,7 +43,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if(NOT DEFINED TARGET_ARCH_ABI)
set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
endif()
include(cross_compiling/host)
include(cross_compiling/armlinux)
include(cross_compiling/android)
......
......@@ -79,6 +79,10 @@ function (lite_deps TARGET)
endfunction()
# Add names for lite libraries for latter compile. We use this name list to avoid compiling
# the whole fluid project to accelerate the compile speed.
set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt")
file(WRITE ${offline_lib_registry_file} "") # clean
# cc_library with branch support.
# The branches:
# X86_DEPS: works only when LITE_WITH_X86 is ON.
......@@ -106,6 +110,9 @@ function(lite_cc_library TARGET)
)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
# register a library name.
file(APPEND ${offline_lib_registry_file} "${TARGET}\n")
endfunction()
function(lite_cc_binary TARGET)
......@@ -131,10 +138,6 @@ endfunction()
# Add a unit-test name to file for latter offline manual test.
set(offline_test_registry_file "${CMAKE_BINARY_DIR}/lite_tests.txt")
file(WRITE ${offline_test_registry_file} "") # clean
function (register_test_offline TARGET)
file(APPEND ${offline_test_registry_file} "${TARGET}\n")
endfunction()
# Test lite modules.
function(lite_cc_test TARGET)
set(options "")
......@@ -155,7 +158,7 @@ function(lite_cc_test TARGET)
HVY_DEPS ${args_HVY_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
register_test_offline("${TARGET}")
file(APPEND ${offline_test_registry_file} "${TARGET}\n")
endfunction()
add_subdirectory(core)
......
......@@ -19,7 +19,7 @@ endif()
proto_library(framework_proto_lite SRCS framework.proto)
cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite)
cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
cc_library(variable_lite SRCS variable.cc)
cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
......@@ -30,7 +30,7 @@ cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapp
cc_library(types_lite SRCS types.cc)
cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite)
lite_cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto)
cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
add_subdirectory(mir)
......
......@@ -28,31 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
mir_pass_manager
program_fake_utils
)
set(test_variable_place_infrence_pass_DEPS
mul_op_lite
feed_op_lite
fetch_op_lite
io_copy_op_lite
${host_kernels}
mir_passes
mir_pass_manager
optimizer_lite
program_fake_utils
target_wrapper_host
)
if (LITE_WITH_CUDA)
set(test_variable_place_infrence_pass_DEPS
${test_variable_place_infrence_pass_DEPS} target_wrapper_cuda
kernels_cuda
)
endif()
cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
${test_variable_place_infrence_pass_DEPS})
# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
# DEPS
# mul_op_lite
# feed_op_lite
# fetch_op_lite
# io_copy_op_lite
# ${host_kernels}
# mir_passes
# mir_pass_manager
# optimizer_lite
# program_fake_utils
# target_wrapper_host
# PROFILE_DEPS basic_profiler_lite
# CUDA_DEPS target_wrapper_cuda kernels_cuda
# ARM_DEPS mul_compute_arm
# X86_DEPS mul_compute_x86
# )
lite_cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite)
lite_cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite)
cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite)
cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite)
cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
mir_passes compatible_pb_lite program_lite ${ops_lite})
# TODO(wz) replace framework/proto to lite proto.
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
# it depends on the fluid/framework/proto, that is too heavy for mobile execution.
lite_cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
mir_passes compatible_pb_lite program_lite ${ops_lite})
endif()
......@@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
if (!node.IsStmt()) continue;
auto& instruct = node.AsStmt();
std::vector<std::pair<size_t, std::unique_ptr<KernelBase>>> scored;
CHECK(!instruct.valid_kernels.empty()) << "No kernels found for "
<< instruct.op_type;
for (auto&& kernel : instruct.valid_kernels) {
size_t score = KernelGrade(*kernel);
scored.emplace_back(score, std::move(kernel));
......
......@@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) {
Place{
TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
},
Place{
TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
},
Place{
TARGET(kX86), PRECISION(kAny), DATALAYOUT(kAny),
},
});
Program program(*desc->Proto(), scope, places);
......@@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) {
});
Place prefered_place{
#ifdef PADDLE_WITH_CUDA
TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else
#ifdef PADDLE_WITH_ARM
TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW),
#else // X86
TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
#endif // ARM
#endif
};
optimizer.KernelPickPreferPlace(prefered_place);
optimizer.Run(std::move(program), places, factor, passes);
......@@ -72,3 +86,16 @@ USE_LITE_OP(mul);
USE_LITE_OP(feed);
USE_LITE_OP(fetch);
USE_LITE_OP(io_copy);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#endif
......@@ -55,8 +55,14 @@ struct Program {
const std::list<std::string>& weights() const { return weights_; }
const std::list<std::string>& tmp_vars() const { return tmp_vars_; }
std::list<std::string>* mutable_weights() { return &weights_; }
std::list<std::string>* mutable_tmp_vars() { return &tmp_vars_; }
const std::list<std::shared_ptr<OpLite>>& ops() const { return ops_; }
std::list<std::shared_ptr<OpLite>>* mutable_ops() { return &ops_; }
lite::Scope* exec_scope() { return exec_scope_; }
lite::Scope* scope() { return scope_.get(); }
private:
// Build from a program and scope.
......
......@@ -33,11 +33,11 @@ Program FakeProgram() {
std::string w1 = "w" + std::to_string(id);
std::string b1 = "b" + std::to_string(id);
std::string out1 = "out" + std::to_string(id);
auto w1v = program.scope_->Var(w1)->GetMutable<lite::Tensor>();
auto b1v = program.scope_->Var(b1)->GetMutable<lite::Tensor>();
auto out1v = program.scope_->Var(out1)->GetMutable<lite::Tensor>();
auto w1v = program.scope()->Var(w1)->GetMutable<lite::Tensor>();
auto b1v = program.scope()->Var(b1)->GetMutable<lite::Tensor>();
auto out1v = program.scope()->Var(out1)->GetMutable<lite::Tensor>();
lite::OpDesc desc;
cpp::OpDesc desc;
desc.SetInput("Input", {x});
desc.SetInput("W", {w1});
desc.SetInput("Bias", {b1});
......@@ -46,12 +46,12 @@ Program FakeProgram() {
desc.SetAttr("in_num_col_dims", 1);
// add to input
program.tmp_vars_.push_back(w1);
program.tmp_vars_.push_back(b1);
program.mutable_tmp_vars()->push_back(w1);
program.mutable_tmp_vars()->push_back(b1);
auto fc_op = LiteOpRegistry::Global().Create("fc");
fc_op->Attach(desc, program.scope_.get());
program.ops_.emplace_back(std::move(fc_op));
fc_op->Attach(desc, program.scope());
program.mutable_ops()->emplace_back(std::move(fc_op));
w1v->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
b1v->Resize(DDimHvy(std::vector<int64_t>({100, 1})));
......@@ -64,8 +64,8 @@ Program FakeProgram() {
// out1, w2, b2 -fc-> out2
std::string x = "x";
program.tmp_vars_.push_back(x);
auto* xv = program.scope_->Var(x)->GetMutable<lite::Tensor>();
program.mutable_tmp_vars()->push_back(x);
auto* xv = program.scope()->Var(x)->GetMutable<lite::Tensor>();
xv->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
for (int i = 0; i < 3; i++) {
......
lite_cc_library(gen_code_lite SRCS gen_code.cc
DEPS program_lite op_lite scope
DEPS program_lite op_lite scope_lite
cpp_op_desc_lite
HVY_DEPS operator)
lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite)
lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
mul_op_lite
compatible_pb_lite
model_parser_lite
X86_DEPS mul_compute_x86
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
mul_op_lite
compatible_pb_lite
model_parser_lite
X86_DEPS mul_compute_x86
ARM_DEPS mul_compute_arm
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_library(__generated_code__
SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc
DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
......
......@@ -136,4 +136,10 @@ TEST(gen_code, optimized_program) {
} // namespace paddle
USE_LITE_OP(mul);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
#endif
......@@ -12,4 +12,4 @@ set(host_kernels
reshape_compute_host
)
set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels")
......@@ -2,6 +2,7 @@
set -ex
TESTS_FILE="./lite_tests.txt"
LIBS_FILE="./lite_libs.txt"
readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
......@@ -42,18 +43,21 @@ function cmake_arm {
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
}
# function build {
# file=$1
# for _test in $(cat $file); do
# make $_test -j$(expr $(nproc) - 2)
# done
# }
function build {
file=$1
for _test in $(cat $file); do
make $_test -j$(expr $(nproc) - 2)
done
}
# It will eagerly test all lite related unittests.
function test_lite {
local file=$1
echo "file: ${file}"
for _test in $(cat $file); do
# We move the build phase here to make the 'gen_code' test compiles after the
# corresponding test is executed and the C++ code generates.
make $_test -j$(expr $(nproc) - 2)
ctest -R $_test -V
done
......@@ -98,8 +102,10 @@ function build_test_server {
cd ./build
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
cmake_x86_for_CI
#build $TESTS_FILE
# compile the tests and execute them.
test_lite $TESTS_FILE
# build the remaining libraries to check compiling error.
build $LIBS_FILE
}
# Build the code and run lite server tests. This is executed in the CI system.
......@@ -129,7 +135,6 @@ function build_test_arm {
build_dir=build.lite.${os}.${abi}
mkdir -p $build_dir
cd $build_dir
cmake_arm ${os} ${abi}
build $TESTS_FILE
......@@ -177,10 +182,11 @@ function main {
TESTS_FILE="${i#*=}"
shift
;;
# build)
# build $TESTS_FILE
# shift
# ;;
build)
build $TESTS_FILE
build $LIBS_FILE
shift
;;
cmake_x86)
cmake_x86
shift
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册