From 2cee3058c00a1f27d16747252a8a6c3a38a21cfd Mon Sep 17 00:00:00 2001
From: Yan Chunwei <yanchunwei@outlook.com>
Date: Tue, 11 Jun 2019 21:56:24 +0800
Subject: [PATCH] Lite/fix mobile combile2 (#18004)

---
 CMakeLists.txt                                |  2 +-
 paddle/fluid/lite/CMakeLists.txt              | 13 +++--
 paddle/fluid/lite/core/CMakeLists.txt         |  4 +-
 paddle/fluid/lite/core/mir/CMakeLists.txt     | 55 ++++++++++---------
 .../lite/core/mir/static_kernel_pick_pass.cc  |  2 +
 .../mir/variable_place_inference_pass_test.cc | 27 +++++++++
 paddle/fluid/lite/core/program.h              |  6 ++
 paddle/fluid/lite/core/program_fake_utils.h   | 20 +++----
 paddle/fluid/lite/gen_code/CMakeLists.txt     | 17 +++---
 paddle/fluid/lite/gen_code/gen_code_test.cc   |  6 ++
 paddle/fluid/lite/kernels/host/CMakeLists.txt |  2 +-
 paddle/fluid/lite/tools/build.sh              | 30 ++++++----
 12 files changed, 119 insertions(+), 65 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ef4a4c351e..036a5faf24f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,7 +43,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
     if(NOT DEFINED TARGET_ARCH_ABI)
         set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
     endif()
-    
+
     include(cross_compiling/host)
     include(cross_compiling/armlinux)
     include(cross_compiling/android)
diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt
index 2c263cc4f68..ac9ff84da44 100644
--- a/paddle/fluid/lite/CMakeLists.txt
+++ b/paddle/fluid/lite/CMakeLists.txt
@@ -79,6 +79,10 @@ function (lite_deps TARGET)
 
 endfunction()
 
+# Add names for lite libraries for latter compile. We use this name list to avoid compiling
+# the whole fluid project to accelerate the compile speed.
+set(offline_lib_registry_file "${CMAKE_BINARY_DIR}/lite_libs.txt")
+file(WRITE ${offline_lib_registry_file} "") # clean
 # cc_library with branch support.
 # The branches:
 #  X86_DEPS: works only when LITE_WITH_X86 is ON.
@@ -106,6 +110,9 @@ function(lite_cc_library TARGET)
             )
 
     cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
+
+    # register a library name.
+    file(APPEND ${offline_lib_registry_file} "${TARGET}\n")
 endfunction()
 
 function(lite_cc_binary TARGET)
@@ -131,10 +138,6 @@ endfunction()
 # Add a unit-test name to file for latter offline manual test.
 set(offline_test_registry_file "${CMAKE_BINARY_DIR}/lite_tests.txt")
 file(WRITE ${offline_test_registry_file} "") # clean
-function (register_test_offline TARGET)
-  file(APPEND ${offline_test_registry_file} "${TARGET}\n")
-endfunction()
-
 # Test lite modules.
 function(lite_cc_test TARGET)
     set(options "")
@@ -155,7 +158,7 @@ function(lite_cc_test TARGET)
             HVY_DEPS ${args_HVY_DEPS}
             )
     _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
-    register_test_offline("${TARGET}")
+    file(APPEND ${offline_test_registry_file} "${TARGET}\n")
 endfunction()
 
 add_subdirectory(core)
diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt
index a71420b8c11..e5aef8d84fa 100644
--- a/paddle/fluid/lite/core/CMakeLists.txt
+++ b/paddle/fluid/lite/core/CMakeLists.txt
@@ -19,7 +19,7 @@ endif()
 
 proto_library(framework_proto_lite SRCS framework.proto)
 
-cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite)
+cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
 cc_library(variable_lite SRCS variable.cc)
 cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
 cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
@@ -30,7 +30,7 @@ cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapp
 cc_library(types_lite SRCS types.cc)
 cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
 
-cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite)
+lite_cc_library(program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto)
 cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
 
 add_subdirectory(mir)
diff --git a/paddle/fluid/lite/core/mir/CMakeLists.txt b/paddle/fluid/lite/core/mir/CMakeLists.txt
index 26dc50ab73e..84cba88d11d 100644
--- a/paddle/fluid/lite/core/mir/CMakeLists.txt
+++ b/paddle/fluid/lite/core/mir/CMakeLists.txt
@@ -28,31 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
         mir_pass_manager
         program_fake_utils
         )
-set(test_variable_place_infrence_pass_DEPS
-        mul_op_lite
-        feed_op_lite
-        fetch_op_lite
-        io_copy_op_lite
-        ${host_kernels}
-        mir_passes
-        mir_pass_manager
-        optimizer_lite
-        program_fake_utils
-        target_wrapper_host
-        )
-if (LITE_WITH_CUDA)
-    set(test_variable_place_infrence_pass_DEPS
-            ${test_variable_place_infrence_pass_DEPS} target_wrapper_cuda
-            kernels_cuda
-            )
-endif()
-cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
-        ${test_variable_place_infrence_pass_DEPS})
+# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
+#   DEPS
+#       mul_op_lite
+#       feed_op_lite
+#       fetch_op_lite
+#       io_copy_op_lite
+#       ${host_kernels}
+#       mir_passes
+#       mir_pass_manager
+#       optimizer_lite
+#       program_fake_utils
+#       target_wrapper_host
+#   PROFILE_DEPS basic_profiler_lite
+#   CUDA_DEPS target_wrapper_cuda kernels_cuda
+#   ARM_DEPS mul_compute_arm
+#   X86_DEPS mul_compute_x86
+# )
+
+
+lite_cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite)
+lite_cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
 
-cc_library(pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite)
-cc_test(test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite)
+lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite)
 
-cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite)
-cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
-    pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
-    mir_passes compatible_pb_lite program_lite ${ops_lite})
+# TODO(wz) replace framework/proto to lite proto.
+if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    # it depends on the fluid/framework/proto, that is too heavy for mobile execution.
+    lite_cc_test(test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
+        pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
+        mir_passes compatible_pb_lite program_lite ${ops_lite})
+endif()
diff --git a/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc b/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
index a3599664438..9d48c123a0c 100644
--- a/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
+++ b/paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
@@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
     if (!node.IsStmt()) continue;
     auto& instruct = node.AsStmt();
     std::vector<std::pair<size_t, std::unique_ptr<KernelBase>>> scored;
+    CHECK(!instruct.valid_kernels.empty()) << "No kernels found for "
+                                           << instruct.op_type;
     for (auto&& kernel : instruct.valid_kernels) {
       size_t score = KernelGrade(*kernel);
       scored.emplace_back(score, std::move(kernel));
diff --git a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
index 9c33ff698ac..d6b8561c378 100644
--- a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
+++ b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
@@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) {
       Place{
           TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
       },
+      Place{
+          TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
+      },
+      Place{
+          TARGET(kX86), PRECISION(kAny), DATALAYOUT(kAny),
+      },
   });
 
   Program program(*desc->Proto(), scope, places);
@@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) {
   });
 
   Place prefered_place{
+#ifdef PADDLE_WITH_CUDA
       TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW),
+#else
+#ifdef PADDLE_WITH_ARM
+      TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW),
+#else   // X86
+      TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW),
+#endif  // ARM
+#endif
   };
   optimizer.KernelPickPreferPlace(prefered_place);
   optimizer.Run(std::move(program), places, factor, passes);
@@ -72,3 +86,16 @@ USE_LITE_OP(mul);
 USE_LITE_OP(feed);
 USE_LITE_OP(fetch);
 USE_LITE_OP(io_copy);
+
+#ifdef LITE_WITH_X86
+USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
+#endif
+
+#ifdef LITE_WITH_ARM
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+#endif
+
+#ifdef LITE_WITH_CUDA
+USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
+USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
+#endif
diff --git a/paddle/fluid/lite/core/program.h b/paddle/fluid/lite/core/program.h
index 450a625bc24..4f2f65d3fa7 100644
--- a/paddle/fluid/lite/core/program.h
+++ b/paddle/fluid/lite/core/program.h
@@ -55,8 +55,14 @@ struct Program {
 
   const std::list<std::string>& weights() const { return weights_; }
   const std::list<std::string>& tmp_vars() const { return tmp_vars_; }
+  std::list<std::string>* mutable_weights() { return &weights_; }
+  std::list<std::string>* mutable_tmp_vars() { return &tmp_vars_; }
+
   const std::list<std::shared_ptr<OpLite>>& ops() const { return ops_; }
+  std::list<std::shared_ptr<OpLite>>* mutable_ops() { return &ops_; }
+
   lite::Scope* exec_scope() { return exec_scope_; }
+  lite::Scope* scope() { return scope_.get(); }
 
  private:
   // Build from a program and scope.
diff --git a/paddle/fluid/lite/core/program_fake_utils.h b/paddle/fluid/lite/core/program_fake_utils.h
index a18fa9f17f4..b36e47bf1f2 100644
--- a/paddle/fluid/lite/core/program_fake_utils.h
+++ b/paddle/fluid/lite/core/program_fake_utils.h
@@ -33,11 +33,11 @@ Program FakeProgram() {
     std::string w1 = "w" + std::to_string(id);
     std::string b1 = "b" + std::to_string(id);
     std::string out1 = "out" + std::to_string(id);
-    auto w1v = program.scope_->Var(w1)->GetMutable<lite::Tensor>();
-    auto b1v = program.scope_->Var(b1)->GetMutable<lite::Tensor>();
-    auto out1v = program.scope_->Var(out1)->GetMutable<lite::Tensor>();
+    auto w1v = program.scope()->Var(w1)->GetMutable<lite::Tensor>();
+    auto b1v = program.scope()->Var(b1)->GetMutable<lite::Tensor>();
+    auto out1v = program.scope()->Var(out1)->GetMutable<lite::Tensor>();
 
-    lite::OpDesc desc;
+    cpp::OpDesc desc;
     desc.SetInput("Input", {x});
     desc.SetInput("W", {w1});
     desc.SetInput("Bias", {b1});
@@ -46,12 +46,12 @@ Program FakeProgram() {
     desc.SetAttr("in_num_col_dims", 1);
 
     // add to input
-    program.tmp_vars_.push_back(w1);
-    program.tmp_vars_.push_back(b1);
+    program.mutable_tmp_vars()->push_back(w1);
+    program.mutable_tmp_vars()->push_back(b1);
 
     auto fc_op = LiteOpRegistry::Global().Create("fc");
-    fc_op->Attach(desc, program.scope_.get());
-    program.ops_.emplace_back(std::move(fc_op));
+    fc_op->Attach(desc, program.scope());
+    program.mutable_ops()->emplace_back(std::move(fc_op));
 
     w1v->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
     b1v->Resize(DDimHvy(std::vector<int64_t>({100, 1})));
@@ -64,8 +64,8 @@ Program FakeProgram() {
   // out1, w2, b2 -fc-> out2
 
   std::string x = "x";
-  program.tmp_vars_.push_back(x);
-  auto* xv = program.scope_->Var(x)->GetMutable<lite::Tensor>();
+  program.mutable_tmp_vars()->push_back(x);
+  auto* xv = program.scope()->Var(x)->GetMutable<lite::Tensor>();
   xv->Resize(DDimHvy(std::vector<int64_t>({100, 100})));
 
   for (int i = 0; i < 3; i++) {
diff --git a/paddle/fluid/lite/gen_code/CMakeLists.txt b/paddle/fluid/lite/gen_code/CMakeLists.txt
index f7c38c176e1..bacfc3e988e 100644
--- a/paddle/fluid/lite/gen_code/CMakeLists.txt
+++ b/paddle/fluid/lite/gen_code/CMakeLists.txt
@@ -1,17 +1,18 @@
 lite_cc_library(gen_code_lite SRCS gen_code.cc
-        DEPS program_lite op_lite scope
+        DEPS program_lite op_lite scope_lite
         cpp_op_desc_lite
         HVY_DEPS operator)
 lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite)
 
-lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
-        mul_op_lite
-        compatible_pb_lite
-        model_parser_lite
-        X86_DEPS mul_compute_x86
-        ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
-
 if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
+            mul_op_lite
+            compatible_pb_lite
+            model_parser_lite
+            X86_DEPS mul_compute_x86
+            ARM_DEPS mul_compute_arm
+            ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
+
     lite_cc_library(__generated_code__
         SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc
         DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
diff --git a/paddle/fluid/lite/gen_code/gen_code_test.cc b/paddle/fluid/lite/gen_code/gen_code_test.cc
index 96ef56e857e..c27b775c061 100644
--- a/paddle/fluid/lite/gen_code/gen_code_test.cc
+++ b/paddle/fluid/lite/gen_code/gen_code_test.cc
@@ -136,4 +136,10 @@ TEST(gen_code, optimized_program) {
 }  // namespace paddle
 
 USE_LITE_OP(mul);
+#ifdef LITE_WITH_X86
 USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
+#endif
+
+#ifdef LITE_WITH_ARM
+USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
+#endif
diff --git a/paddle/fluid/lite/kernels/host/CMakeLists.txt b/paddle/fluid/lite/kernels/host/CMakeLists.txt
index 03c0023cb41..a71a8e13ab8 100644
--- a/paddle/fluid/lite/kernels/host/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/host/CMakeLists.txt
@@ -12,4 +12,4 @@ set(host_kernels
     reshape_compute_host
     )
 
-set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
+set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels")
diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh
index 5afbc003cf7..2a31f8d1ff9 100755
--- a/paddle/fluid/lite/tools/build.sh
+++ b/paddle/fluid/lite/tools/build.sh
@@ -2,6 +2,7 @@
 set -ex
 
 TESTS_FILE="./lite_tests.txt"
+LIBS_FILE="./lite_libs.txt"
 
 readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
 
@@ -42,18 +43,21 @@ function cmake_arm {
         -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
 }
 
-# function build {
-#     file=$1
-#     for _test in $(cat $file); do
-#         make $_test -j$(expr $(nproc) - 2)
-#     done
-# }
+function build {
+    file=$1
+    for _test in $(cat $file); do
+        make $_test -j$(expr $(nproc) - 2)
+    done
+}
 
 # It will eagerly test all lite related unittests.
 function test_lite {
     local file=$1
     echo "file: ${file}"
+
     for _test in $(cat $file); do
+        # We move the build phase here to make the 'gen_code' test compiles after the
+        # corresponding test is executed and the C++ code generates.
         make $_test -j$(expr $(nproc) - 2)
         ctest -R $_test -V
     done
@@ -98,8 +102,10 @@ function build_test_server {
     cd ./build
     export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
     cmake_x86_for_CI
-    #build $TESTS_FILE
+    # compile the tests and execute them.
     test_lite $TESTS_FILE
+    # build the remaining libraries to check compiling error.
+    build $LIBS_FILE
 }
 
 # Build the code and run lite server tests. This is executed in the CI system.
@@ -129,7 +135,6 @@ function build_test_arm {
             build_dir=build.lite.${os}.${abi}
             mkdir -p $build_dir
             cd $build_dir
-
             cmake_arm ${os} ${abi}
             build $TESTS_FILE
 
@@ -177,10 +182,11 @@ function main {
                 TESTS_FILE="${i#*=}"
                 shift
                 ;;
-            # build)
-            #     build $TESTS_FILE
-            #     shift
-            #     ;;
+            build)
+                build $TESTS_FILE
+                build $LIBS_FILE
+                shift
+                ;;
             cmake_x86)
                 cmake_x86
                 shift
-- 
GitLab