diff --git a/lite/backends/mlu/target_wrapper.cc b/lite/backends/mlu/target_wrapper.cc index b98854946db7eda4f133d773ae0f5ba9e45a77cc..7317dd2fb8127841529cafbd714d4e203ebe93a7 100644 --- a/lite/backends/mlu/target_wrapper.cc +++ b/lite/backends/mlu/target_wrapper.cc @@ -18,6 +18,7 @@ #include #include "lite/backends/mlu/mlu_utils.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -37,12 +38,14 @@ void cnrtMemcpyDtoH(void* dst, const void* src, size_t size) { } // namespace mlu -thread_local cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{CNML_MLU270}; -thread_local int TargetWrapperMlu::mlu_core_number_{1}; -thread_local bool TargetWrapperMlu::use_first_conv_{false}; -thread_local std::vector TargetWrapperMlu::mean_vec_; -thread_local std::vector TargetWrapperMlu::std_vec_; -thread_local DataLayoutType TargetWrapperMlu::input_layout_{DATALAYOUT(kNCHW)}; +LITE_THREAD_LOCAL cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{ + CNML_MLU270}; +LITE_THREAD_LOCAL int TargetWrapperMlu::mlu_core_number_{1}; +LITE_THREAD_LOCAL bool TargetWrapperMlu::use_first_conv_{false}; +LITE_THREAD_LOCAL std::vector TargetWrapperMlu::mean_vec_; +LITE_THREAD_LOCAL std::vector TargetWrapperMlu::std_vec_; +LITE_THREAD_LOCAL DataLayoutType TargetWrapperMlu::input_layout_{ + DATALAYOUT(kNCHW)}; size_t TargetWrapperMlu::num_devices() { uint32_t dev_count = 0; diff --git a/lite/backends/mlu/target_wrapper.h b/lite/backends/mlu/target_wrapper.h index 2566ae153e2f9539d1ad5739f208bc5f946a7542..7cb46edc3efb0dfc5f4c042ccf3dbe6c042acb24 100644 --- a/lite/backends/mlu/target_wrapper.h +++ b/lite/backends/mlu/target_wrapper.h @@ -17,6 +17,7 @@ #include #include "lite/backends/mlu/mlu_utils.h" #include "lite/core/target_wrapper.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -58,12 +59,12 @@ class TargetWrapper { static DataLayoutType InputLayout(); private: - static thread_local cnmlCoreVersion_t mlu_core_version_; - static thread_local int mlu_core_number_; - static thread_local bool use_first_conv_; - static thread_local std::vector mean_vec_; - static thread_local std::vector std_vec_; - static thread_local DataLayoutType input_layout_; + static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_; + static LITE_THREAD_LOCAL int mlu_core_number_; + static LITE_THREAD_LOCAL bool use_first_conv_; + static LITE_THREAD_LOCAL std::vector mean_vec_; + static LITE_THREAD_LOCAL std::vector std_vec_; + static LITE_THREAD_LOCAL DataLayoutType input_layout_; }; } // namespace lite diff --git a/lite/backends/x86/jit/helper.h b/lite/backends/x86/jit/helper.h index 57a3611bb671c6d83ec3212702a57e3fc7d7f35f..4f1411ec256cac9ba890cad0721d2d4784a513e7 100644 --- a/lite/backends/x86/jit/helper.h +++ b/lite/backends/x86/jit/helper.h @@ -24,6 +24,7 @@ #include "lite/backends/x86/jit/kernel_key.h" #include "lite/backends/x86/jit/kernel_pool.h" #include "lite/utils/cp_logging.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -178,7 +179,7 @@ class KernelFuncs { public: KernelFuncs() = default; static KernelFuncs& Cache() { - static thread_local KernelFuncs g_func_cache; + static LITE_THREAD_LOCAL KernelFuncs g_func_cache; return g_func_cache; } diff --git a/lite/backends/x86/jit/kernel_pool.h b/lite/backends/x86/jit/kernel_pool.h index dc0b1bbf2ecc4b14c6f6acd02d4dad4909c58f73..41ec078c8a619468da8eeeed695cd37d3cbef438 100644 --- a/lite/backends/x86/jit/kernel_pool.h +++ b/lite/backends/x86/jit/kernel_pool.h @@ -22,6 +22,7 @@ #include "lite/backends/x86/jit/gen_base.h" #include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_key.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -35,7 +36,7 @@ class JitCodePool { public: JitCodePool() = default; static JitCodePool& Instance() { - static thread_local JitCodePool g_jit_codes; + static LITE_THREAD_LOCAL JitCodePool g_jit_codes; return g_jit_codes; } diff --git a/lite/backends/xpu/target_wrapper.cc b/lite/backends/xpu/target_wrapper.cc index a322418ccde20a34dc6c6ba9b47601a9a658f99c..a3d8729410299170964e3ce3b59feb4b970a121b 100644 --- a/lite/backends/xpu/target_wrapper.cc +++ b/lite/backends/xpu/target_wrapper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "lite/backends/xpu/target_wrapper.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -55,7 +56,7 @@ XPUScratchPadGuard TargetWrapperXPU::MallocScratchPad(size_t size, std::string TargetWrapperXPU::multi_encoder_precision; // NOLINT int TargetWrapperXPU::workspace_l3_size_per_thread{0}; -thread_local xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr}; +LITE_THREAD_LOCAL xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr}; } // namespace lite } // namespace paddle diff --git a/lite/backends/xpu/target_wrapper.h b/lite/backends/xpu/target_wrapper.h index 070184a13088a169fe38f1b8105a0803d9915da1..1a888b126a43783ddae5654de38f5b2e201eaa5e 100644 --- a/lite/backends/xpu/target_wrapper.h +++ b/lite/backends/xpu/target_wrapper.h @@ -18,6 +18,7 @@ #include "lite/backends/xpu/xpu_header_sitter.h" // xpu_free #include "lite/core/target_wrapper.h" // TargetWrapper #include "lite/utils/cp_logging.h" // CHECK_EQ +#include "lite/utils/macros.h" #define XPU_CALL(func) \ { \ @@ -99,7 +100,7 @@ class TargetWrapper { static int workspace_l3_size_per_thread; private: - static thread_local xdnn::Context* tls_raw_ctx_; + static LITE_THREAD_LOCAL xdnn::Context* tls_raw_ctx_; }; } // namespace lite diff --git a/lite/core/context.cc b/lite/core/context.cc index abb44945ec66e1a89efc1ccb08ec1df370f2e099..c39b5ce4266f7606a6cab56c91b139a3f7712a65 100644 --- a/lite/core/context.cc +++ b/lite/core/context.cc @@ -13,15 +13,16 @@ // limitations under the License. #include "lite/core/context.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { #ifdef LITE_WITH_HUAWEI_ASCEND_NPU -thread_local std::string +LITE_THREAD_LOCAL std::string Context::subgraph_model_cache_dir_{ ""}; // NOLINT -thread_local int +LITE_THREAD_LOCAL int Context::huawei_ascend_device_id_{ 0}; // NOLINT #endif diff --git a/lite/core/context.h b/lite/core/context.h index 5567eadbf88137a41f53a7c57eafea6a0ccb1953..84742bf478c26e5609c507925c6d28805cb3a70c 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -44,6 +44,7 @@ #include "lite/core/tensor.h" #include "lite/utils/all.h" #include "lite/utils/env.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -128,8 +129,8 @@ class Context { static int HuaweiAscendDeviceID() { return huawei_ascend_device_id_; } private: - static thread_local std::string subgraph_model_cache_dir_; - static thread_local int huawei_ascend_device_id_; + static LITE_THREAD_LOCAL std::string subgraph_model_cache_dir_; + static LITE_THREAD_LOCAL int huawei_ascend_device_id_; }; #endif diff --git a/lite/core/device_info.cc b/lite/core/device_info.cc index 6d404cee9718a94d2646728c8f2d79576ceb7860..cd135f85b3b55641ae1996b2d3b933e1da7870dc 100644 --- a/lite/core/device_info.cc +++ b/lite/core/device_info.cc @@ -54,17 +54,18 @@ #include #include #include "lite/core/device_info.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) -thread_local lite_api::PowerMode DeviceInfo::mode_; -thread_local ARMArch DeviceInfo::arch_; -thread_local int DeviceInfo::mem_size_; -thread_local std::vector DeviceInfo::active_ids_; -thread_local TensorLite DeviceInfo::workspace_; -thread_local int64_t DeviceInfo::count_ = 0; +LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_; +LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_; +LITE_THREAD_LOCAL int DeviceInfo::mem_size_; +LITE_THREAD_LOCAL std::vector DeviceInfo::active_ids_; +LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_; +LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0; #ifdef TARGET_IOS const int DEFAULT_L1_CACHE_SIZE = 64 * 1024; diff --git a/lite/core/device_info.h b/lite/core/device_info.h index f3f10c2d5740d6e8cc7e219b8f0d9d9ff17a8496..c95f285e1433e9ca55595d4a5f0cb814c488fe7b 100644 --- a/lite/core/device_info.h +++ b/lite/core/device_info.h @@ -22,6 +22,7 @@ #ifdef LITE_WITH_MLU #include "lite/backends/mlu/mlu_utils.h" #endif +#include "lite/utils/macros.h" namespace paddle { namespace lite { @@ -99,12 +100,12 @@ class DeviceInfo { // LITE_POWER_HIGH stands for using big cores, // LITE_POWER_LOW stands for using small core, // LITE_POWER_FULL stands for using all cores - static thread_local lite_api::PowerMode mode_; - static thread_local ARMArch arch_; - static thread_local int mem_size_; - static thread_local std::vector active_ids_; - static thread_local TensorLite workspace_; - static thread_local int64_t count_; + static LITE_THREAD_LOCAL lite_api::PowerMode mode_; + static LITE_THREAD_LOCAL ARMArch arch_; + static LITE_THREAD_LOCAL int mem_size_; + static LITE_THREAD_LOCAL std::vector active_ids_; + static LITE_THREAD_LOCAL TensorLite workspace_; + static LITE_THREAD_LOCAL int64_t count_; void SetDotInfo(int argc, ...); void SetFP16Info(int argc, ...); diff --git a/lite/core/mir/mlu_postprocess_pass.cc b/lite/core/mir/mlu_postprocess_pass.cc index e09220d083ee8241001b6d9d55fb48eb1ba74f2e..f7cd7663e436bcac09963b7418c7d509daab1349 100644 --- a/lite/core/mir/mlu_postprocess_pass.cc +++ b/lite/core/mir/mlu_postprocess_pass.cc @@ -23,12 +23,13 @@ #include "lite/core/mir/pass_registry.h" #include "lite/core/mir/subgraph/subgraph_detector.h" #include "lite/operators/subgraph_op.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { namespace mir { -static thread_local int g_stream_id = 0; +static LITE_THREAD_LOCAL int g_stream_id = 0; Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type, const std::string& cast_arg_name, diff --git a/lite/core/workspace.h b/lite/core/workspace.h index 54efb6699ac6df63286b26843f8d79b7c84949f1..9a294049e049491559e0e73a6caad37a9d41571a 100644 --- a/lite/core/workspace.h +++ b/lite/core/workspace.h @@ -50,7 +50,8 @@ class WorkSpace { } static WorkSpace& Global_Host() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kHost))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kHost))); return *x; } @@ -64,14 +65,16 @@ class WorkSpace { #if defined(LITE_WITH_CUDA) static WorkSpace& Global_CUDA() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kCUDA))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kCUDA))); return *x; } #endif #if defined(LITE_WITH_MLU) static WorkSpace& Global_MLU() { - thread_local std::unique_ptr x(new WorkSpace(TARGET(kMLU))); + static LITE_THREAD_LOCAL std::unique_ptr x( + new WorkSpace(TARGET(kMLU))); return *x; } #endif diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h index 07c6b20efb9a72106cf6ae288c411e490345b089..ef1e924791633aa45232a0cacd9c6964f6b8dc73 100644 --- a/lite/kernels/mlu/bridges/graph.h +++ b/lite/kernels/mlu/bridges/graph.h @@ -24,6 +24,7 @@ #include "lite/core/tensor.h" #include "lite/kernels/mlu/bridges/tensor.h" #include "lite/utils/env.h" +#include "lite/utils/macros.h" #define PRINT_HW_TIME false @@ -135,7 +136,7 @@ class Graph { #define MEASURE_HWTIME_END(que) \ do { \ - thread_local float hw_time; \ + static LITE_THREAD_LOCAL float hw_time; \ CNRT_CALL(cnrtPlaceNotifier(notifier_end_, que)); \ CNRT_CALL(cnrtSyncQueue(que)); \ CNRT_CALL(cnrtNotifierDuration(notifier_start_, notifier_end_, &hw_time)); \ diff --git a/lite/kernels/mlu/bridges/test_helper.cc b/lite/kernels/mlu/bridges/test_helper.cc index 36eeb473f6a37aa28a9447280f808f5fb08978d0..6d4f6cf9b7a5242ecb8b2103ee800691dc6769b3 100644 --- a/lite/kernels/mlu/bridges/test_helper.cc +++ b/lite/kernels/mlu/bridges/test_helper.cc @@ -19,6 +19,7 @@ #include "lite/kernels/mlu/bridges/utility.h" #include "lite/kernels/mlu/subgraph_compute.h" #include "lite/kernels/npu/bridges/registry.h" +#include "lite/utils/macros.h" namespace paddle { namespace lite { namespace subgraph { @@ -29,7 +30,7 @@ void PrepareInput(Graph* graph, const std::string& input_name, Tensor* input_tensor, cnmlDataOrder_t order) { - thread_local Tensor temp_input; + static LITE_THREAD_LOCAL Tensor temp_input; temp_input.Resize(input_tensor->dims().Vectorize()); temp_input.CopyDataFrom(*input_tensor); using data_type = typename MLUTypeTraits::type; diff --git a/lite/tools/build.sh b/lite/tools/build.sh index 6fc38180e729eaaf8230bd6bb4e8638f045e709b..bbfa81be2d9b47ddeba132be7f841a992ca9de0d 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -38,6 +38,7 @@ WITH_HUAWEI_ASCEND_NPU=OFF # Huawei Ascend Builder/Runtime Libs on X86 host HUAWEI_ASCEND_NPU_DDK_ROOT="/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux_gcc4.8.5" PYTHON_EXECUTABLE_OPTION="" ENABLE_FLATBUFFERS_DESC_VIEW=OFF +IOS_DEPLOYMENT_TARGET=9.0 readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz @@ -321,6 +322,7 @@ function make_ios { -DARM_TARGET_ARCH_ABI=$abi \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_WITH_CV=$BUILD_CV \ + -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \ -DARM_TARGET_OS=$os make publish_inference -j$NUM_PROC @@ -437,6 +439,7 @@ function print_usage { echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)" echo -e "--build_dir: directory for building" echo -e "--enable_flatbuffers_view: (OFF|ON); Use the flatbuffers read-only view to load the model. If ON, the naive buffer will no longer be supported." + echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment." echo echo -e "argument choices:" echo -e "--arm_os:\t android|ios|ios64" @@ -585,6 +588,10 @@ function main { ENABLE_FLATBUFFERS_DESC_VIEW="${i#*=}" shift ;; + --ios_deployment_target=*) + IOS_DEPLOYMENT_TARGET="${i#*=}" + shift + ;; tiny_publish) make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL shift diff --git a/lite/tools/build_ios.sh b/lite/tools/build_ios.sh index f4232d0d2c2c5120ccc352f0d4b5f956137bef75..f8a78e348722a4cd3f244c6c340d0fbde7c3a13b 100755 --- a/lite/tools/build_ios.sh +++ b/lite/tools/build_ios.sh @@ -19,6 +19,7 @@ workspace=$PWD/$(dirname $0)/../../ # options of striping lib according to input model. OPTMODEL_DIR="" WITH_STRIP=OFF +IOS_DEPLOYMENT_TARGET=9.0 # num of threads used during compiling.. readonly NUM_PROC=${LITE_BUILD_THREADS:-4} ##################################################################################################### @@ -80,6 +81,7 @@ function make_ios { -DARM_TARGET_ARCH_ABI=$arch \ -DLITE_BUILD_EXTRA=$WITH_EXTRA \ -DLITE_WITH_CV=$WITH_CV \ + -DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \ -DARM_TARGET_OS=$os make publish_inference -j$NUM_PROC @@ -104,6 +106,7 @@ function print_usage { echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |" echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |" echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |" + echo -e "| --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment. |" echo -e "| |" echo -e "| arguments of striping lib according to input model:(armv8, gcc, c++_static) |" echo -e "| ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir |" @@ -151,6 +154,10 @@ function main { WITH_EXCEPTION="${i#*=}" shift ;; + --ios_deployment_target=*) + IOS_DEPLOYMENT_TARGET="${i#*=}" + shift + ;; help) print_usage exit 0 diff --git a/lite/utils/macros.h b/lite/utils/macros.h index 0fbe90fa45b9408f8eab64008da0016510bec59e..5c2f85e92cd7c16f5aabe1b46af90c4584440a8d 100644 --- a/lite/utils/macros.h +++ b/lite/utils/macros.h @@ -53,3 +53,14 @@ #if defined(__FLT_MAX__) #define FLT_MAX __FLT_MAX__ #endif // __FLT_MAX__ + +#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ + (__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000) +// Thread local storage will be ignored because the linker for iOS 8 does not +// support it. +#define LITE_THREAD_LOCAL +#elif __cplusplus >= 201103 +#define LITE_THREAD_LOCAL thread_local +#else +#error "C++11 support is required for paddle-lite compilation." +#endif