diff --git a/lite/backends/x86/jit/helper.h b/lite/backends/x86/jit/helper.h index b21be9466c05f4c41127ba781360a946e1c1b98c..41542783b440fb530d451f6808b6ec811061ad5e 100644 --- a/lite/backends/x86/jit/helper.h +++ b/lite/backends/x86/jit/helper.h @@ -23,6 +23,7 @@ #include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_key.h" #include "lite/backends/x86/jit/kernel_pool.h" +#include "lite/utils/macros.h" #include "lite/utils/paddle_enforce.h" namespace paddle { @@ -178,7 +179,7 @@ class KernelFuncs { public: KernelFuncs() = default; static KernelFuncs& Cache() { - static thread_local KernelFuncs g_func_cache; + static LITE_THREAD_LOCAL KernelFuncs g_func_cache; return g_func_cache; } diff --git a/lite/backends/x86/jit/kernel_pool.h b/lite/backends/x86/jit/kernel_pool.h index dc0b1bbf2ecc4b14c6f6acd02d4dad4909c58f73..41ec078c8a619468da8eeeed695cd37d3cbef438 100644 --- a/lite/backends/x86/jit/kernel_pool.h +++ b/lite/backends/x86/jit/kernel_pool.h @@ -22,6 +22,7 @@ #include "lite/backends/x86/jit/gen_base.h" #include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_key.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -35,7 +36,7 @@ class JitCodePool { public: JitCodePool() = default; static JitCodePool& Instance() { - static thread_local JitCodePool g_jit_codes; + static LITE_THREAD_LOCAL JitCodePool g_jit_codes; return g_jit_codes; } diff --git a/lite/core/context.cc b/lite/core/context.cc index 711c67f8b7f36edcd2d66569d964296d96e8d85c..e9efb4b137a9ec5b9858952ad576a4df21e34fba 100644 --- a/lite/core/context.cc +++ b/lite/core/context.cc @@ -18,7 +18,8 @@ namespace paddle { namespace lite { #ifdef LITE_WITH_XPU -thread_local xdnn::Context* Context::_tls_raw_ctx{nullptr}; +LITE_THREAD_LOCAL xdnn::Context* Context::_tls_raw_ctx{ + nullptr}; int Context::_workspace_l3_size_per_thread{0}; #endif diff --git a/lite/core/context.h b/lite/core/context.h index d0c1bd93cc7b93628aedc5f549c84d19c44f4f71..f75f19caf4dc77b178fae6ea03188c73b992fc96 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -14,7 +14,12 @@ #pragma once -#include "lite/utils/any.h" +#include +#include +#include +#include +#include +#include #ifdef LITE_WITH_CUDA #include "lite/backends/cuda/context.h" #endif @@ -31,18 +36,13 @@ #ifdef LITE_WITH_XPU #include "lite/backends/xpu/xpu_header_sitter.h" #endif - -#include -#include -#include -#include -#include -#include #include "lite/core/device_info.h" #include "lite/core/target_wrapper.h" #include "lite/core/tensor.h" #include "lite/utils/all.h" +#include "lite/utils/any.h" #include "lite/utils/env.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -181,7 +181,7 @@ class Context { std::string name() const { return "XPUContext"; } private: - static thread_local xdnn::Context* _tls_raw_ctx; + static LITE_THREAD_LOCAL xdnn::Context* _tls_raw_ctx; static int _workspace_l3_size_per_thread; }; #endif diff --git a/lite/core/device_info.cc b/lite/core/device_info.cc index 09da06a4168268c670577c159a2a306a8959d81d..93508f424f64b9674a0de512f6f9c3a284e3bace 100644 --- a/lite/core/device_info.cc +++ b/lite/core/device_info.cc @@ -59,20 +59,20 @@ namespace paddle { namespace lite { #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) -thread_local lite_api::PowerMode DeviceInfo::mode_; -thread_local ARMArch DeviceInfo::arch_; -thread_local int DeviceInfo::mem_size_; -thread_local std::vector DeviceInfo::active_ids_; -thread_local TensorLite DeviceInfo::workspace_; -thread_local int64_t DeviceInfo::count_ = 0; +LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_; +LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_; +LITE_THREAD_LOCAL int DeviceInfo::mem_size_; +LITE_THREAD_LOCAL std::vector DeviceInfo::active_ids_; +LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_; +LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0; #ifdef LITE_WITH_MLU -thread_local cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270}; -thread_local int DeviceInfo::mlu_core_number_{1}; -thread_local bool DeviceInfo::use_first_conv_{false}; -thread_local std::vector DeviceInfo::mean_vec_; -thread_local std::vector DeviceInfo::std_vec_; -thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)}; +LITE_THREAD_LOCAL cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270}; +LITE_THREAD_LOCAL int DeviceInfo::mlu_core_number_{1}; +LITE_THREAD_LOCAL bool DeviceInfo::use_first_conv_{false}; +LITE_THREAD_LOCAL std::vector DeviceInfo::mean_vec_; +LITE_THREAD_LOCAL std::vector DeviceInfo::std_vec_; +LITE_THREAD_LOCAL DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)}; #endif #ifdef TARGET_IOS diff --git a/lite/core/device_info.h b/lite/core/device_info.h index b06eb8d944735971133bb7a29aa0f06075e60626..d35efebf7c9a0703b6d8acc0ad3960890fcfe607 100644 --- a/lite/core/device_info.h +++ b/lite/core/device_info.h @@ -22,6 +22,7 @@ #ifdef LITE_WITH_MLU #include "lite/backends/mlu/mlu_utils.h" #endif +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -113,20 +114,20 @@ class DeviceInfo { // LITE_POWER_HIGH stands for using big cores, // LITE_POWER_LOW stands for using small core, // LITE_POWER_FULL stands for using all cores - static thread_local lite_api::PowerMode mode_; - static thread_local ARMArch arch_; - static thread_local int mem_size_; - static thread_local std::vector active_ids_; - static thread_local TensorLite workspace_; - static thread_local int64_t count_; + static LITE_THREAD_LOCAL lite_api::PowerMode mode_; + static LITE_THREAD_LOCAL ARMArch arch_; + static LITE_THREAD_LOCAL int mem_size_; + static LITE_THREAD_LOCAL std::vector active_ids_; + static LITE_THREAD_LOCAL TensorLite workspace_; + static LITE_THREAD_LOCAL int64_t count_; #ifdef LITE_WITH_MLU - static thread_local cnmlCoreVersion_t mlu_core_version_; - static thread_local int mlu_core_number_; - static thread_local bool use_first_conv_; - static thread_local std::vector mean_vec_; - static thread_local std::vector std_vec_; - static thread_local DataLayoutType input_layout_; + static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_; + static LITE_THREAD_LOCAL int mlu_core_number_; + static LITE_THREAD_LOCAL bool use_first_conv_; + static LITE_THREAD_LOCAL std::vector mean_vec_; + static LITE_THREAD_LOCAL std::vector std_vec_; + static LITE_THREAD_LOCAL DataLayoutType input_layout_; #endif void SetDotInfo(int argc, ...); diff --git a/lite/core/workspace.h b/lite/core/workspace.h index 54efb6699ac6df63286b26843f8d79b7c84949f1..9a294049e049491559e0e73a6caad37a9d41571a 100644 --- a/lite/core/workspace.h +++ b/lite/core/workspace.h @@ -50,7 +50,8 @@ class WorkSpace { } static WorkSpace& Global_Host() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kHost))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kHost))); return *x; } @@ -64,14 +65,16 @@ class WorkSpace { #if defined(LITE_WITH_CUDA) static WorkSpace& Global_CUDA() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kCUDA))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kCUDA))); return *x; } #endif #if defined(LITE_WITH_MLU) static WorkSpace& Global_MLU() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kMLU))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kMLU))); return *x; } #endif diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h index b846d15af06c683ad685b04da5588f7ecedd0d38..fd203de20413e1f23181dc7167daa4ff49568e38 100644 --- a/lite/kernels/mlu/bridges/graph.h +++ b/lite/kernels/mlu/bridges/graph.h @@ -22,6 +22,7 @@ #include "lite/core/op_lite.h" #include "lite/core/tensor.h" #include "lite/kernels/mlu/bridges/tensor.h" +#include "lite/utils/macros.h" #define PRINT_HW_TIME false @@ -113,7 +114,7 @@ class Graph { void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) { #if PRINT_HW_TIME - thread_local float hw_time; + static LITE_THREAD_LOCAL float hw_time; CNRT_CALL(cnrtPlaceNotifier(notifier_start_, que)); #endif CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_, diff --git a/lite/tools/build.sh b/lite/tools/build.sh index c9f5be17cb1f3dea01142e18aeb94dd95f2d522b..5f2c5dfda8258e6c9ab5cf0bd58f44e9919e9743 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -34,6 +34,7 @@ BUILD_RKNPU=OFF RKNPU_DDK_ROOT="$(pwd)/rknpu/" LITE_WITH_ARM_LANG=OFF PYTHON_EXECUTABLE_OPTION="" +IOS_DEPLOYMENT_TARGET=9.0 readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz @@ -322,6 +323,7 @@ function make_ios { -DARM_TARGET_ARCH_ABI=$abi \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_WITH_CV=$BUILD_CV \ + -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \ -DARM_TARGET_OS=$os make publish_inference -j$NUM_PROC @@ -426,6 +428,7 @@ function print_usage { echo -e "--build_python: (OFF|ON); controls whether to publish python api lib (ANDROID and IOS is not supported)" echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)" echo -e "--build_dir: directory for building" + echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment." echo echo -e "argument choices:" echo -e "--arm_os:\t android|ios|ios64" @@ -551,6 +554,10 @@ function main { RKNPU_DDK_ROOT="${i#*=}" shift ;; + --ios_deployment_target=*) + IOS_DEPLOYMENT_TARGET="${i#*=}" + shift + ;; tiny_publish) make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL shift diff --git a/lite/tools/build_ios.sh b/lite/tools/build_ios.sh index 2c7eeb466f3d82cf491b6a631d79918fa4fd4cd2..6c57b6498251dceee368df80bc6f14bbd5c72b49 100755 --- a/lite/tools/build_ios.sh +++ b/lite/tools/build_ios.sh @@ -17,6 +17,7 @@ workspace=$PWD/$(dirname $0)/../../ # options of striping lib according to input model. OPTMODEL_DIR="" WITH_STRIP=OFF +IOS_DEPLOYMENT_TARGET=9.0 # num of threads used during compiling.. readonly NUM_PROC=${LITE_BUILD_THREADS:-4} ##################################################################################################### @@ -74,6 +75,7 @@ function make_ios { -DARM_TARGET_ARCH_ABI=$arch \ -DLITE_BUILD_EXTRA=$WITH_EXTRA \ -DLITE_WITH_CV=$WITH_CV \ + -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \ -DARM_TARGET_OS=$os make publish_inference -j$NUM_PROC @@ -97,6 +99,7 @@ function print_usage { echo -e "| --with_cv: (OFF|ON); controls whether to compile cv functions into lib, default is OFF |" echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |" echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |" + echo -e "| --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment. |" echo -e "| |" echo -e "| arguments of striping lib according to input model:(armv8, gcc, c++_static) |" echo -e "| ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir |" @@ -140,6 +143,10 @@ function main { WITH_LOG="${i#*=}" shift ;; + --ios_deployment_target=*) + IOS_DEPLOYMENT_TARGET="${i#*=}" + shift + ;; help) print_usage exit 0 diff --git a/lite/utils/macros.h b/lite/utils/macros.h index 0fbe90fa45b9408f8eab64008da0016510bec59e..5c2f85e92cd7c16f5aabe1b46af90c4584440a8d 100644 --- a/lite/utils/macros.h +++ b/lite/utils/macros.h @@ -53,3 +53,14 @@ #if defined(__FLT_MAX__) #define FLT_MAX __FLT_MAX__ #endif // __FLT_MAX__ + +#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ + (__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000) +// Thread local storage will be ignored because the linker for iOS 8 does not +// support it. +#define LITE_THREAD_LOCAL +#elif __cplusplus >= 201103 +#define LITE_THREAD_LOCAL thread_local +#else +#error "C++11 support is required for paddle-lite compilation." +#endif