From 914219cc5fd22a4b2e18895e44ded3ab802c9d34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?=
 <39303645+Shixiaowei02@users.noreply.github.com>
Date: Tue, 8 Sep 2020 20:46:43 +0800
Subject: [PATCH] platform portability of tls, test=develop (#4261)

* platform portability of tls, test=develop

* update build_ios.sh, test=develop

* add static keyword for tls, test=develop

* rename the alias of tls, test=develop
---
 lite/backends/mlu/target_wrapper.cc     | 15 +++++++++------
 lite/backends/mlu/target_wrapper.h      | 13 +++++++------
 lite/backends/x86/jit/helper.h          |  3 ++-
 lite/backends/x86/jit/kernel_pool.h     |  3 ++-
 lite/backends/xpu/target_wrapper.cc     |  3 ++-
 lite/backends/xpu/target_wrapper.h      |  3 ++-
 lite/core/context.cc                    |  5 +++--
 lite/core/context.h                     |  5 +++--
 lite/core/device_info.cc                | 13 +++++++------
 lite/core/device_info.h                 | 13 +++++++------
 lite/core/mir/mlu_postprocess_pass.cc   |  3 ++-
 lite/core/workspace.h                   |  9 ++++++---
 lite/kernels/mlu/bridges/graph.h        |  3 ++-
 lite/kernels/mlu/bridges/test_helper.cc |  3 ++-
 lite/tools/build.sh                     |  7 +++++++
 lite/tools/build_ios.sh                 |  7 +++++++
 lite/utils/macros.h                     | 11 +++++++++++
 17 files changed, 81 insertions(+), 38 deletions(-)
diff --git a/lite/backends/mlu/target_wrapper.cc b/lite/backends/mlu/target_wrapper.cc
index b98854946d..7317dd2fb8 100644
--- a/lite/backends/mlu/target_wrapper.cc
+++ b/lite/backends/mlu/target_wrapper.cc
@@ -18,6 +18,7 @@
 #include <utility>
 
 #include "lite/backends/mlu/mlu_utils.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -37,12 +38,14 @@ void cnrtMemcpyDtoH(void* dst, const void* src, size_t size) {
 
 }  // namespace mlu
 
-thread_local cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{CNML_MLU270};
-thread_local int TargetWrapperMlu::mlu_core_number_{1};
-thread_local bool TargetWrapperMlu::use_first_conv_{false};
-thread_local std::vector<float> TargetWrapperMlu::mean_vec_;
-thread_local std::vector<float> TargetWrapperMlu::std_vec_;
-thread_local DataLayoutType TargetWrapperMlu::input_layout_{DATALAYOUT(kNCHW)};
+LITE_THREAD_LOCAL cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{
+    CNML_MLU270};
+LITE_THREAD_LOCAL int TargetWrapperMlu::mlu_core_number_{1};
+LITE_THREAD_LOCAL bool TargetWrapperMlu::use_first_conv_{false};
+LITE_THREAD_LOCAL std::vector<float> TargetWrapperMlu::mean_vec_;
+LITE_THREAD_LOCAL std::vector<float> TargetWrapperMlu::std_vec_;
+LITE_THREAD_LOCAL DataLayoutType TargetWrapperMlu::input_layout_{
+    DATALAYOUT(kNCHW)};
 
 size_t TargetWrapperMlu::num_devices() {
   uint32_t dev_count = 0;
diff --git a/lite/backends/mlu/target_wrapper.h b/lite/backends/mlu/target_wrapper.h
index 2566ae153e..7cb46edc3e 100644
--- a/lite/backends/mlu/target_wrapper.h
+++ b/lite/backends/mlu/target_wrapper.h
@@ -17,6 +17,7 @@
 #include <vector>
 #include "lite/backends/mlu/mlu_utils.h"
 #include "lite/core/target_wrapper.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -58,12 +59,12 @@ class TargetWrapper<TARGET(kMLU)> {
   static DataLayoutType InputLayout();
 
  private:
-  static thread_local cnmlCoreVersion_t mlu_core_version_;
-  static thread_local int mlu_core_number_;
-  static thread_local bool use_first_conv_;
-  static thread_local std::vector<float> mean_vec_;
-  static thread_local std::vector<float> std_vec_;
-  static thread_local DataLayoutType input_layout_;
+  static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_;
+  static LITE_THREAD_LOCAL int mlu_core_number_;
+  static LITE_THREAD_LOCAL bool use_first_conv_;
+  static LITE_THREAD_LOCAL std::vector<float> mean_vec_;
+  static LITE_THREAD_LOCAL std::vector<float> std_vec_;
+  static LITE_THREAD_LOCAL DataLayoutType input_layout_;
 };
 
 }  // namespace lite
diff --git a/lite/backends/x86/jit/helper.h b/lite/backends/x86/jit/helper.h
index 57a3611bb6..4f1411ec25 100644
--- a/lite/backends/x86/jit/helper.h
+++ b/lite/backends/x86/jit/helper.h
@@ -24,6 +24,7 @@
 #include "lite/backends/x86/jit/kernel_key.h"
 #include "lite/backends/x86/jit/kernel_pool.h"
 #include "lite/utils/cp_logging.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -178,7 +179,7 @@ class KernelFuncs {
  public:
   KernelFuncs() = default;
   static KernelFuncs& Cache() {
-    static thread_local KernelFuncs<KernelTuple, PlaceType> g_func_cache;
+    static LITE_THREAD_LOCAL KernelFuncs<KernelTuple, PlaceType> g_func_cache;
     return g_func_cache;
   }
 
diff --git a/lite/backends/x86/jit/kernel_pool.h b/lite/backends/x86/jit/kernel_pool.h
index dc0b1bbf2e..41ec078c8a 100644
--- a/lite/backends/x86/jit/kernel_pool.h
+++ b/lite/backends/x86/jit/kernel_pool.h
@@ -22,6 +22,7 @@
 #include "lite/backends/x86/jit/gen_base.h"
 #include "lite/backends/x86/jit/kernel_base.h"
 #include "lite/backends/x86/jit/kernel_key.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -35,7 +36,7 @@ class JitCodePool {
  public:
   JitCodePool() = default;
   static JitCodePool& Instance() {
-    static thread_local JitCodePool<KT> g_jit_codes;
+    static LITE_THREAD_LOCAL JitCodePool<KT> g_jit_codes;
     return g_jit_codes;
   }
 
diff --git a/lite/backends/xpu/target_wrapper.cc b/lite/backends/xpu/target_wrapper.cc
index a322418ccd..a3d8729410 100644
--- a/lite/backends/xpu/target_wrapper.cc
+++ b/lite/backends/xpu/target_wrapper.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "lite/backends/xpu/target_wrapper.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -55,7 +56,7 @@ XPUScratchPadGuard TargetWrapperXPU::MallocScratchPad(size_t size,
 
 std::string TargetWrapperXPU::multi_encoder_precision;  // NOLINT
 int TargetWrapperXPU::workspace_l3_size_per_thread{0};
-thread_local xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr};
+LITE_THREAD_LOCAL xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr};
 
 }  // namespace lite
 }  // namespace paddle
diff --git a/lite/backends/xpu/target_wrapper.h b/lite/backends/xpu/target_wrapper.h
index 070184a130..1a888b126a 100644
--- a/lite/backends/xpu/target_wrapper.h
+++ b/lite/backends/xpu/target_wrapper.h
@@ -18,6 +18,7 @@
 #include "lite/backends/xpu/xpu_header_sitter.h"  // xpu_free
 #include "lite/core/target_wrapper.h"             // TargetWrapper
 #include "lite/utils/cp_logging.h"                // CHECK_EQ
+#include "lite/utils/macros.h"
 
 #define XPU_CALL(func)                                        \
   {                                                           \
@@ -99,7 +100,7 @@ class TargetWrapper<TARGET(kXPU)> {
   static int workspace_l3_size_per_thread;
 
  private:
-  static thread_local xdnn::Context* tls_raw_ctx_;
+  static LITE_THREAD_LOCAL xdnn::Context* tls_raw_ctx_;
 };
 
 }  // namespace lite
diff --git a/lite/core/context.cc b/lite/core/context.cc
index abb44945ec..c39b5ce426 100644
--- a/lite/core/context.cc
+++ b/lite/core/context.cc
@@ -13,15 +13,16 @@
 // limitations under the License.
 
 #include "lite/core/context.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
 
 #ifdef LITE_WITH_HUAWEI_ASCEND_NPU
-thread_local std::string
+LITE_THREAD_LOCAL std::string
     Context<TargetType::kHuaweiAscendNPU>::subgraph_model_cache_dir_{
         ""};  // NOLINT
-thread_local int
+LITE_THREAD_LOCAL int
     Context<TargetType::kHuaweiAscendNPU>::huawei_ascend_device_id_{
         0};  // NOLINT
 #endif
diff --git a/lite/core/context.h b/lite/core/context.h
index 5567eadbf8..84742bf478 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -44,6 +44,7 @@
 #include "lite/core/tensor.h"
 #include "lite/utils/all.h"
 #include "lite/utils/env.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -128,8 +129,8 @@ class Context<TargetType::kHuaweiAscendNPU> {
   static int HuaweiAscendDeviceID() { return huawei_ascend_device_id_; }
 
  private:
-  static thread_local std::string subgraph_model_cache_dir_;
-  static thread_local int huawei_ascend_device_id_;
+  static LITE_THREAD_LOCAL std::string subgraph_model_cache_dir_;
+  static LITE_THREAD_LOCAL int huawei_ascend_device_id_;
 };
 #endif
 
diff --git a/lite/core/device_info.cc b/lite/core/device_info.cc
index 6d404cee97..cd135f85b3 100644
--- a/lite/core/device_info.cc
+++ b/lite/core/device_info.cc
@@ -54,17 +54,18 @@
 #include <algorithm>
 #include <limits>
 #include "lite/core/device_info.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
 
 #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
-thread_local lite_api::PowerMode DeviceInfo::mode_;
-thread_local ARMArch DeviceInfo::arch_;
-thread_local int DeviceInfo::mem_size_;
-thread_local std::vector<int> DeviceInfo::active_ids_;
-thread_local TensorLite DeviceInfo::workspace_;
-thread_local int64_t DeviceInfo::count_ = 0;
+LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_;
+LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_;
+LITE_THREAD_LOCAL int DeviceInfo::mem_size_;
+LITE_THREAD_LOCAL std::vector<int> DeviceInfo::active_ids_;
+LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_;
+LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0;
 
 #ifdef TARGET_IOS
 const int DEFAULT_L1_CACHE_SIZE = 64 * 1024;
diff --git a/lite/core/device_info.h b/lite/core/device_info.h
index f3f10c2d57..c95f285e14 100644
--- a/lite/core/device_info.h
+++ b/lite/core/device_info.h
@@ -22,6 +22,7 @@
 #ifdef LITE_WITH_MLU
 #include "lite/backends/mlu/mlu_utils.h"
 #endif
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -99,12 +100,12 @@ class DeviceInfo {
   // LITE_POWER_HIGH stands for using big cores,
   // LITE_POWER_LOW stands for using small core,
   // LITE_POWER_FULL stands for using all cores
-  static thread_local lite_api::PowerMode mode_;
-  static thread_local ARMArch arch_;
-  static thread_local int mem_size_;
-  static thread_local std::vector<int> active_ids_;
-  static thread_local TensorLite workspace_;
-  static thread_local int64_t count_;
+  static LITE_THREAD_LOCAL lite_api::PowerMode mode_;
+  static LITE_THREAD_LOCAL ARMArch arch_;
+  static LITE_THREAD_LOCAL int mem_size_;
+  static LITE_THREAD_LOCAL std::vector<int> active_ids_;
+  static LITE_THREAD_LOCAL TensorLite workspace_;
+  static LITE_THREAD_LOCAL int64_t count_;
 
   void SetDotInfo(int argc, ...);
   void SetFP16Info(int argc, ...);
diff --git a/lite/core/mir/mlu_postprocess_pass.cc b/lite/core/mir/mlu_postprocess_pass.cc
index e09220d083..f7cd7663e4 100644
--- a/lite/core/mir/mlu_postprocess_pass.cc
+++ b/lite/core/mir/mlu_postprocess_pass.cc
@@ -23,12 +23,13 @@
 #include "lite/core/mir/pass_registry.h"
 #include "lite/core/mir/subgraph/subgraph_detector.h"
 #include "lite/operators/subgraph_op.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
 namespace mir {
 
-static thread_local int g_stream_id = 0;
+static LITE_THREAD_LOCAL int g_stream_id = 0;
 
 Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
                                            const std::string& cast_arg_name,
diff --git a/lite/core/workspace.h b/lite/core/workspace.h
index 54efb6699a..9a294049e0 100644
--- a/lite/core/workspace.h
+++ b/lite/core/workspace.h
@@ -50,7 +50,8 @@ class WorkSpace {
   }
 
   static WorkSpace& Global_Host() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kHost)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kHost)));
     return *x;
   }
 
@@ -64,14 +65,16 @@ class WorkSpace {
 
 #if defined(LITE_WITH_CUDA)
   static WorkSpace& Global_CUDA() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kCUDA)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kCUDA)));
     return *x;
   }
 #endif
 
 #if defined(LITE_WITH_MLU)
   static WorkSpace& Global_MLU() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kMLU)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kMLU)));
     return *x;
   }
 #endif
diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h
index 07c6b20efb..ef1e924791 100644
--- a/lite/kernels/mlu/bridges/graph.h
+++ b/lite/kernels/mlu/bridges/graph.h
@@ -24,6 +24,7 @@
 #include "lite/core/tensor.h"
 #include "lite/kernels/mlu/bridges/tensor.h"
 #include "lite/utils/env.h"
+#include "lite/utils/macros.h"
 
 #define PRINT_HW_TIME false
 
@@ -135,7 +136,7 @@ class Graph {
 
 #define MEASURE_HWTIME_END(que)                                                \
   do {                                                                         \
-    thread_local float hw_time;                                                \
+    static LITE_THREAD_LOCAL float hw_time;                                    \
     CNRT_CALL(cnrtPlaceNotifier(notifier_end_, que));                          \
     CNRT_CALL(cnrtSyncQueue(que));                                             \
     CNRT_CALL(cnrtNotifierDuration(notifier_start_, notifier_end_, &hw_time)); \
diff --git a/lite/kernels/mlu/bridges/test_helper.cc b/lite/kernels/mlu/bridges/test_helper.cc
index 36eeb473f6..6d4f6cf9b7 100644
--- a/lite/kernels/mlu/bridges/test_helper.cc
+++ b/lite/kernels/mlu/bridges/test_helper.cc
@@ -19,6 +19,7 @@
 #include "lite/kernels/mlu/bridges/utility.h"
 #include "lite/kernels/mlu/subgraph_compute.h"
 #include "lite/kernels/npu/bridges/registry.h"
+#include "lite/utils/macros.h"
 namespace paddle {
 namespace lite {
 namespace subgraph {
@@ -29,7 +30,7 @@ void PrepareInput(Graph* graph,
                   const std::string& input_name,
                   Tensor* input_tensor,
                   cnmlDataOrder_t order) {
-  thread_local Tensor temp_input;
+  static LITE_THREAD_LOCAL Tensor temp_input;
   temp_input.Resize(input_tensor->dims().Vectorize());
   temp_input.CopyDataFrom(*input_tensor);
   using data_type = typename MLUTypeTraits<Dtype>::type;
diff --git a/lite/tools/build.sh b/lite/tools/build.sh
index 6fc38180e7..bbfa81be2d 100755
--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -38,6 +38,7 @@ WITH_HUAWEI_ASCEND_NPU=OFF # Huawei Ascend Builder/Runtime Libs on X86 host
 HUAWEI_ASCEND_NPU_DDK_ROOT="/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux_gcc4.8.5"
 PYTHON_EXECUTABLE_OPTION=""
 ENABLE_FLATBUFFERS_DESC_VIEW=OFF
+IOS_DEPLOYMENT_TARGET=9.0
 
 readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
 
@@ -321,6 +322,7 @@ function make_ios {
             -DARM_TARGET_ARCH_ABI=$abi \
             -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
             -DLITE_WITH_CV=$BUILD_CV \
+            -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
             -DARM_TARGET_OS=$os
 
     make publish_inference -j$NUM_PROC
@@ -437,6 +439,7 @@ function print_usage {
     echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)"
     echo -e "--build_dir: directory for building"
     echo -e "--enable_flatbuffers_view: (OFF|ON); Use the flatbuffers read-only view to load the model. If ON, the naive buffer will no longer be supported."
+    echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment."
     echo
     echo -e "argument choices:"
     echo -e "--arm_os:\t android|ios|ios64"
@@ -585,6 +588,10 @@ function main {
                 ENABLE_FLATBUFFERS_DESC_VIEW="${i#*=}"
                 shift
                 ;;
+            --ios_deployment_target=*)
+                IOS_DEPLOYMENT_TARGET="${i#*=}"
+                shift
+                ;;
             tiny_publish)
                 make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL 
                 shift
diff --git a/lite/tools/build_ios.sh b/lite/tools/build_ios.sh
index f4232d0d2c..f8a78e3487 100755
--- a/lite/tools/build_ios.sh
+++ b/lite/tools/build_ios.sh
@@ -19,6 +19,7 @@ workspace=$PWD/$(dirname $0)/../../
 # options of striping lib according to input model.
 OPTMODEL_DIR=""
 WITH_STRIP=OFF
+IOS_DEPLOYMENT_TARGET=9.0
 # num of threads used during compiling..
 readonly NUM_PROC=${LITE_BUILD_THREADS:-4}
 #####################################################################################################
@@ -80,6 +81,7 @@ function make_ios {
             -DARM_TARGET_ARCH_ABI=$arch \
             -DLITE_BUILD_EXTRA=$WITH_EXTRA \
             -DLITE_WITH_CV=$WITH_CV \
+            -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
             -DARM_TARGET_OS=$os
 
     make publish_inference -j$NUM_PROC
@@ -104,6 +106,7 @@ function print_usage {
     echo -e "|     --with_log: (OFF|ON); controls whether to print log information, default is ON                                                   |"
     echo -e "|     --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF                            |"
     echo -e "|     --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP)  |"
+    echo -e "|     --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment.                           |"
     echo -e "|                                                                                                                                      |"
     echo -e "|  arguments of striping lib according to input model:(armv8, gcc, c++_static)                                                         |"
     echo -e "|     ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir                                              |"
@@ -151,6 +154,10 @@ function main {
                 WITH_EXCEPTION="${i#*=}"
                 shift
                 ;;
+            --ios_deployment_target=*)
+                IOS_DEPLOYMENT_TARGET="${i#*=}"
+                shift
+                ;;
             help)
                 print_usage
                 exit 0
diff --git a/lite/utils/macros.h b/lite/utils/macros.h
index 0fbe90fa45..5c2f85e92c 100644
--- a/lite/utils/macros.h
+++ b/lite/utils/macros.h
@@ -53,3 +53,14 @@
 #if defined(__FLT_MAX__)
 #define FLT_MAX __FLT_MAX__
 #endif  // __FLT_MAX__
+
+#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \
+    (__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000)
+// Thread local storage will be ignored because the linker for iOS 8 does not
+// support it.
+#define LITE_THREAD_LOCAL
+#elif __cplusplus >= 201103
+#define LITE_THREAD_LOCAL thread_local
+#else
+#error "C++11 support is required for paddle-lite compilation."
+#endif
-- 
GitLab