diff --git a/lite/backends/x86/jit/helper.h b/lite/backends/x86/jit/helper.h
index b21be9466c05f4c41127ba781360a946e1c1b98c..41542783b440fb530d451f6808b6ec811061ad5e 100644
--- a/lite/backends/x86/jit/helper.h
+++ b/lite/backends/x86/jit/helper.h
@@ -23,6 +23,7 @@
 #include "lite/backends/x86/jit/kernel_base.h"
 #include "lite/backends/x86/jit/kernel_key.h"
 #include "lite/backends/x86/jit/kernel_pool.h"
+#include "lite/utils/macros.h"
 #include "lite/utils/paddle_enforce.h"
 
 namespace paddle {
@@ -178,7 +179,7 @@ class KernelFuncs {
  public:
   KernelFuncs() = default;
   static KernelFuncs& Cache() {
-    static thread_local KernelFuncs<KernelTuple, PlaceType> g_func_cache;
+    static LITE_THREAD_LOCAL KernelFuncs<KernelTuple, PlaceType> g_func_cache;
     return g_func_cache;
   }
 
diff --git a/lite/backends/x86/jit/kernel_pool.h b/lite/backends/x86/jit/kernel_pool.h
index dc0b1bbf2ecc4b14c6f6acd02d4dad4909c58f73..41ec078c8a619468da8eeeed695cd37d3cbef438 100644
--- a/lite/backends/x86/jit/kernel_pool.h
+++ b/lite/backends/x86/jit/kernel_pool.h
@@ -22,6 +22,7 @@
 #include "lite/backends/x86/jit/gen_base.h"
 #include "lite/backends/x86/jit/kernel_base.h"
 #include "lite/backends/x86/jit/kernel_key.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -35,7 +36,7 @@ class JitCodePool {
  public:
   JitCodePool() = default;
   static JitCodePool& Instance() {
-    static thread_local JitCodePool<KT> g_jit_codes;
+    static LITE_THREAD_LOCAL JitCodePool<KT> g_jit_codes;
     return g_jit_codes;
   }
 
diff --git a/lite/core/context.cc b/lite/core/context.cc
index 711c67f8b7f36edcd2d66569d964296d96e8d85c..e9efb4b137a9ec5b9858952ad576a4df21e34fba 100644
--- a/lite/core/context.cc
+++ b/lite/core/context.cc
@@ -18,7 +18,8 @@ namespace paddle {
 namespace lite {
 
 #ifdef LITE_WITH_XPU
-thread_local xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{nullptr};
+LITE_THREAD_LOCAL xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{
+    nullptr};
 int Context<TargetType::kXPU>::_workspace_l3_size_per_thread{0};
 #endif
 
diff --git a/lite/core/context.h b/lite/core/context.h
index d0c1bd93cc7b93628aedc5f549c84d19c44f4f71..f75f19caf4dc77b178fae6ea03188c73b992fc96 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -14,7 +14,12 @@
 
 #pragma once
 
-#include "lite/utils/any.h"
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
 #ifdef LITE_WITH_CUDA
 #include "lite/backends/cuda/context.h"
 #endif
@@ -31,18 +36,13 @@
 #ifdef LITE_WITH_XPU
 #include "lite/backends/xpu/xpu_header_sitter.h"
 #endif
-
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
 #include "lite/core/device_info.h"
 #include "lite/core/target_wrapper.h"
 #include "lite/core/tensor.h"
 #include "lite/utils/all.h"
+#include "lite/utils/any.h"
 #include "lite/utils/env.h"
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -181,7 +181,7 @@ class Context<TargetType::kXPU> {
   std::string name() const { return "XPUContext"; }
 
  private:
-  static thread_local xdnn::Context* _tls_raw_ctx;
+  static LITE_THREAD_LOCAL xdnn::Context* _tls_raw_ctx;
   static int _workspace_l3_size_per_thread;
 };
 #endif
diff --git a/lite/core/device_info.cc b/lite/core/device_info.cc
index 09da06a4168268c670577c159a2a306a8959d81d..93508f424f64b9674a0de512f6f9c3a284e3bace 100644
--- a/lite/core/device_info.cc
+++ b/lite/core/device_info.cc
@@ -59,20 +59,20 @@ namespace paddle {
 namespace lite {
 
 #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
-thread_local lite_api::PowerMode DeviceInfo::mode_;
-thread_local ARMArch DeviceInfo::arch_;
-thread_local int DeviceInfo::mem_size_;
-thread_local std::vector<int> DeviceInfo::active_ids_;
-thread_local TensorLite DeviceInfo::workspace_;
-thread_local int64_t DeviceInfo::count_ = 0;
+LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_;
+LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_;
+LITE_THREAD_LOCAL int DeviceInfo::mem_size_;
+LITE_THREAD_LOCAL std::vector<int> DeviceInfo::active_ids_;
+LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_;
+LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0;
 
 #ifdef LITE_WITH_MLU
-thread_local cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270};
-thread_local int DeviceInfo::mlu_core_number_{1};
-thread_local bool DeviceInfo::use_first_conv_{false};
-thread_local std::vector<float> DeviceInfo::mean_vec_;
-thread_local std::vector<float> DeviceInfo::std_vec_;
-thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)};
+LITE_THREAD_LOCAL cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270};
+LITE_THREAD_LOCAL int DeviceInfo::mlu_core_number_{1};
+LITE_THREAD_LOCAL bool DeviceInfo::use_first_conv_{false};
+LITE_THREAD_LOCAL std::vector<float> DeviceInfo::mean_vec_;
+LITE_THREAD_LOCAL std::vector<float> DeviceInfo::std_vec_;
+LITE_THREAD_LOCAL DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)};
 #endif
 
 #ifdef TARGET_IOS
diff --git a/lite/core/device_info.h b/lite/core/device_info.h
index b06eb8d944735971133bb7a29aa0f06075e60626..d35efebf7c9a0703b6d8acc0ad3960890fcfe607 100644
--- a/lite/core/device_info.h
+++ b/lite/core/device_info.h
@@ -22,6 +22,7 @@
 #ifdef LITE_WITH_MLU
 #include "lite/backends/mlu/mlu_utils.h"
 #endif
+#include "lite/utils/macros.h"
 
 namespace paddle {
 namespace lite {
@@ -113,20 +114,20 @@ class DeviceInfo {
   // LITE_POWER_HIGH stands for using big cores,
   // LITE_POWER_LOW stands for using small core,
   // LITE_POWER_FULL stands for using all cores
-  static thread_local lite_api::PowerMode mode_;
-  static thread_local ARMArch arch_;
-  static thread_local int mem_size_;
-  static thread_local std::vector<int> active_ids_;
-  static thread_local TensorLite workspace_;
-  static thread_local int64_t count_;
+  static LITE_THREAD_LOCAL lite_api::PowerMode mode_;
+  static LITE_THREAD_LOCAL ARMArch arch_;
+  static LITE_THREAD_LOCAL int mem_size_;
+  static LITE_THREAD_LOCAL std::vector<int> active_ids_;
+  static LITE_THREAD_LOCAL TensorLite workspace_;
+  static LITE_THREAD_LOCAL int64_t count_;
 
 #ifdef LITE_WITH_MLU
-  static thread_local cnmlCoreVersion_t mlu_core_version_;
-  static thread_local int mlu_core_number_;
-  static thread_local bool use_first_conv_;
-  static thread_local std::vector<float> mean_vec_;
-  static thread_local std::vector<float> std_vec_;
-  static thread_local DataLayoutType input_layout_;
+  static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_;
+  static LITE_THREAD_LOCAL int mlu_core_number_;
+  static LITE_THREAD_LOCAL bool use_first_conv_;
+  static LITE_THREAD_LOCAL std::vector<float> mean_vec_;
+  static LITE_THREAD_LOCAL std::vector<float> std_vec_;
+  static LITE_THREAD_LOCAL DataLayoutType input_layout_;
 #endif
 
   void SetDotInfo(int argc, ...);
diff --git a/lite/core/workspace.h b/lite/core/workspace.h
index 54efb6699ac6df63286b26843f8d79b7c84949f1..9a294049e049491559e0e73a6caad37a9d41571a 100644
--- a/lite/core/workspace.h
+++ b/lite/core/workspace.h
@@ -50,7 +50,8 @@ class WorkSpace {
   }
 
   static WorkSpace& Global_Host() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kHost)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kHost)));
     return *x;
   }
 
@@ -64,14 +65,16 @@ class WorkSpace {
 
 #if defined(LITE_WITH_CUDA)
   static WorkSpace& Global_CUDA() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kCUDA)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kCUDA)));
     return *x;
   }
 #endif
 
 #if defined(LITE_WITH_MLU)
   static WorkSpace& Global_MLU() {
-    thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kMLU)));
+    static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
+        new WorkSpace(TARGET(kMLU)));
     return *x;
   }
 #endif
diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h
index b846d15af06c683ad685b04da5588f7ecedd0d38..fd203de20413e1f23181dc7167daa4ff49568e38 100644
--- a/lite/kernels/mlu/bridges/graph.h
+++ b/lite/kernels/mlu/bridges/graph.h
@@ -22,6 +22,7 @@
 #include "lite/core/op_lite.h"
 #include "lite/core/tensor.h"
 #include "lite/kernels/mlu/bridges/tensor.h"
+#include "lite/utils/macros.h"
 
 #define PRINT_HW_TIME false
 
@@ -113,7 +114,7 @@ class Graph {
 
   void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) {
 #if PRINT_HW_TIME
-    thread_local float hw_time;
+    static LITE_THREAD_LOCAL float hw_time;
     CNRT_CALL(cnrtPlaceNotifier(notifier_start_, que));
 #endif
     CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_,
diff --git a/lite/tools/build.sh b/lite/tools/build.sh
index c9f5be17cb1f3dea01142e18aeb94dd95f2d522b..5f2c5dfda8258e6c9ab5cf0bd58f44e9919e9743 100755
--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -34,6 +34,7 @@ BUILD_RKNPU=OFF
 RKNPU_DDK_ROOT="$(pwd)/rknpu/"
 LITE_WITH_ARM_LANG=OFF
 PYTHON_EXECUTABLE_OPTION=""
+IOS_DEPLOYMENT_TARGET=9.0
 
 readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
 
@@ -322,6 +323,7 @@ function make_ios {
             -DARM_TARGET_ARCH_ABI=$abi \
             -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
             -DLITE_WITH_CV=$BUILD_CV \
+            -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
             -DARM_TARGET_OS=$os
 
     make publish_inference -j$NUM_PROC
@@ -426,6 +428,7 @@ function print_usage {
     echo -e "--build_python: (OFF|ON); controls whether to publish python api lib (ANDROID and IOS is not supported)"
     echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)"
     echo -e "--build_dir: directory for building"
+    echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment."
     echo
     echo -e "argument choices:"
     echo -e "--arm_os:\t android|ios|ios64"
@@ -551,6 +554,10 @@ function main {
                 RKNPU_DDK_ROOT="${i#*=}"
                 shift
                 ;;
+            --ios_deployment_target=*)
+                 IOS_DEPLOYMENT_TARGET="${i#*=}"
+                 shift
+                 ;;
             tiny_publish)
                 make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL 
                 shift
diff --git a/lite/tools/build_ios.sh b/lite/tools/build_ios.sh
index 2c7eeb466f3d82cf491b6a631d79918fa4fd4cd2..6c57b6498251dceee368df80bc6f14bbd5c72b49 100755
--- a/lite/tools/build_ios.sh
+++ b/lite/tools/build_ios.sh
@@ -17,6 +17,7 @@ workspace=$PWD/$(dirname $0)/../../
 # options of striping lib according to input model.
 OPTMODEL_DIR=""
 WITH_STRIP=OFF
+IOS_DEPLOYMENT_TARGET=9.0
 # num of threads used during compiling..
 readonly NUM_PROC=${LITE_BUILD_THREADS:-4}
 #####################################################################################################
@@ -74,6 +75,7 @@ function make_ios {
             -DARM_TARGET_ARCH_ABI=$arch \
             -DLITE_BUILD_EXTRA=$WITH_EXTRA \
             -DLITE_WITH_CV=$WITH_CV \
+            -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
             -DARM_TARGET_OS=$os
 
     make publish_inference -j$NUM_PROC
@@ -97,6 +99,7 @@ function print_usage {
     echo -e "|     --with_cv: (OFF|ON); controls whether to compile cv functions into lib, default is OFF                                           |"
     echo -e "|     --with_log: (OFF|ON); controls whether to print log information, default is ON                                                   |"
     echo -e "|     --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP)  |"
+    echo -e "|     --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment.                           |"
     echo -e "|                                                                                                                                      |"
     echo -e "|  arguments of striping lib according to input model:(armv8, gcc, c++_static)                                                         |"
     echo -e "|     ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir                                              |"
@@ -140,6 +143,10 @@ function main {
                 WITH_LOG="${i#*=}"
                 shift
                 ;;
+            --ios_deployment_target=*)
+                 IOS_DEPLOYMENT_TARGET="${i#*=}"
+                 shift
+                 ;;
             help)
                 print_usage
                 exit 0
diff --git a/lite/utils/macros.h b/lite/utils/macros.h
index 0fbe90fa45b9408f8eab64008da0016510bec59e..5c2f85e92cd7c16f5aabe1b46af90c4584440a8d 100644
--- a/lite/utils/macros.h
+++ b/lite/utils/macros.h
@@ -53,3 +53,14 @@
 #if defined(__FLT_MAX__)
 #define FLT_MAX __FLT_MAX__
 #endif  // __FLT_MAX__
+
+#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \
+    (__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000)
+// Thread local storage will be ignored because the linker for iOS 8 does not
+// support it.
+#define LITE_THREAD_LOCAL
+#elif __cplusplus >= 201103
+#define LITE_THREAD_LOCAL thread_local
+#else
+#error "C++11 support is required for paddle-lite compilation."
+#endif