未验证 提交 661b9cb4 编写于 作者: H hong19860320 提交者: GitHub

[cherry-pick] platform portability of tls (#4286)

* [cherry-pick]platform portability of tls, test=develop (#4261)
test=develop
Co-authored-by: N石晓伟 <39303645+Shixiaowei02@users.noreply.github.com>
上级 c9dab9e4
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_base.h"
#include "lite/backends/x86/jit/kernel_key.h" #include "lite/backends/x86/jit/kernel_key.h"
#include "lite/backends/x86/jit/kernel_pool.h" #include "lite/backends/x86/jit/kernel_pool.h"
#include "lite/utils/macros.h"
#include "lite/utils/paddle_enforce.h" #include "lite/utils/paddle_enforce.h"
namespace paddle { namespace paddle {
...@@ -178,7 +179,7 @@ class KernelFuncs { ...@@ -178,7 +179,7 @@ class KernelFuncs {
public: public:
KernelFuncs() = default; KernelFuncs() = default;
static KernelFuncs& Cache() { static KernelFuncs& Cache() {
static thread_local KernelFuncs<KernelTuple, PlaceType> g_func_cache; static LITE_THREAD_LOCAL KernelFuncs<KernelTuple, PlaceType> g_func_cache;
return g_func_cache; return g_func_cache;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "lite/backends/x86/jit/gen_base.h" #include "lite/backends/x86/jit/gen_base.h"
#include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_base.h"
#include "lite/backends/x86/jit/kernel_key.h" #include "lite/backends/x86/jit/kernel_key.h"
#include "lite/utils/macros.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -35,7 +36,7 @@ class JitCodePool { ...@@ -35,7 +36,7 @@ class JitCodePool {
public: public:
JitCodePool() = default; JitCodePool() = default;
static JitCodePool& Instance() { static JitCodePool& Instance() {
static thread_local JitCodePool<KT> g_jit_codes; static LITE_THREAD_LOCAL JitCodePool<KT> g_jit_codes;
return g_jit_codes; return g_jit_codes;
} }
......
...@@ -18,7 +18,8 @@ namespace paddle { ...@@ -18,7 +18,8 @@ namespace paddle {
namespace lite { namespace lite {
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
thread_local xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{nullptr}; LITE_THREAD_LOCAL xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{
nullptr};
int Context<TargetType::kXPU>::_workspace_l3_size_per_thread{0}; int Context<TargetType::kXPU>::_workspace_l3_size_per_thread{0};
#endif #endif
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/utils/all.h" #include "lite/utils/all.h"
#include "lite/utils/env.h" #include "lite/utils/env.h"
#include "lite/utils/macros.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -196,7 +197,7 @@ class Context<TargetType::kXPU> { ...@@ -196,7 +197,7 @@ class Context<TargetType::kXPU> {
std::string name() const { return "XPUContext"; } std::string name() const { return "XPUContext"; }
private: private:
static thread_local xdnn::Context* _tls_raw_ctx; static LITE_THREAD_LOCAL xdnn::Context* _tls_raw_ctx;
static int _workspace_l3_size_per_thread; static int _workspace_l3_size_per_thread;
}; };
#endif #endif
......
...@@ -59,20 +59,20 @@ namespace paddle { ...@@ -59,20 +59,20 @@ namespace paddle {
namespace lite { namespace lite {
#if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
thread_local lite_api::PowerMode DeviceInfo::mode_; LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_;
thread_local ARMArch DeviceInfo::arch_; LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_;
thread_local int DeviceInfo::mem_size_; LITE_THREAD_LOCAL int DeviceInfo::mem_size_;
thread_local std::vector<int> DeviceInfo::active_ids_; LITE_THREAD_LOCAL std::vector<int> DeviceInfo::active_ids_;
thread_local TensorLite DeviceInfo::workspace_; LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_;
thread_local int64_t DeviceInfo::count_ = 0; LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0;
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
thread_local cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270}; LITE_THREAD_LOCAL cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270};
thread_local int DeviceInfo::mlu_core_number_{1}; LITE_THREAD_LOCAL int DeviceInfo::mlu_core_number_{1};
thread_local bool DeviceInfo::use_first_conv_{false}; LITE_THREAD_LOCAL bool DeviceInfo::use_first_conv_{false};
thread_local std::vector<float> DeviceInfo::mean_vec_; LITE_THREAD_LOCAL std::vector<float> DeviceInfo::mean_vec_;
thread_local std::vector<float> DeviceInfo::std_vec_; LITE_THREAD_LOCAL std::vector<float> DeviceInfo::std_vec_;
thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)}; LITE_THREAD_LOCAL DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)};
#endif #endif
#ifdef TARGET_IOS #ifdef TARGET_IOS
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
#include "lite/backends/mlu/mlu_utils.h" #include "lite/backends/mlu/mlu_utils.h"
#endif #endif
#include "lite/utils/macros.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -113,20 +114,20 @@ class DeviceInfo { ...@@ -113,20 +114,20 @@ class DeviceInfo {
// LITE_POWER_HIGH stands for using big cores, // LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core, // LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores // LITE_POWER_FULL stands for using all cores
static thread_local lite_api::PowerMode mode_; static LITE_THREAD_LOCAL lite_api::PowerMode mode_;
static thread_local ARMArch arch_; static LITE_THREAD_LOCAL ARMArch arch_;
static thread_local int mem_size_; static LITE_THREAD_LOCAL int mem_size_;
static thread_local std::vector<int> active_ids_; static LITE_THREAD_LOCAL std::vector<int> active_ids_;
static thread_local TensorLite workspace_; static LITE_THREAD_LOCAL TensorLite workspace_;
static thread_local int64_t count_; static LITE_THREAD_LOCAL int64_t count_;
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
static thread_local cnmlCoreVersion_t mlu_core_version_; static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_;
static thread_local int mlu_core_number_; static LITE_THREAD_LOCAL int mlu_core_number_;
static thread_local bool use_first_conv_; static LITE_THREAD_LOCAL bool use_first_conv_;
static thread_local std::vector<float> mean_vec_; static LITE_THREAD_LOCAL std::vector<float> mean_vec_;
static thread_local std::vector<float> std_vec_; static LITE_THREAD_LOCAL std::vector<float> std_vec_;
static thread_local DataLayoutType input_layout_; static LITE_THREAD_LOCAL DataLayoutType input_layout_;
#endif #endif
void SetDotInfo(int argc, ...); void SetDotInfo(int argc, ...);
......
...@@ -50,7 +50,8 @@ class WorkSpace { ...@@ -50,7 +50,8 @@ class WorkSpace {
} }
static WorkSpace& Global_Host() { static WorkSpace& Global_Host() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kHost))); static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kHost)));
return *x; return *x;
} }
...@@ -64,14 +65,16 @@ class WorkSpace { ...@@ -64,14 +65,16 @@ class WorkSpace {
#if defined(LITE_WITH_CUDA) #if defined(LITE_WITH_CUDA)
static WorkSpace& Global_CUDA() { static WorkSpace& Global_CUDA() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kCUDA))); static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kCUDA)));
return *x; return *x;
} }
#endif #endif
#if defined(LITE_WITH_MLU) #if defined(LITE_WITH_MLU)
static WorkSpace& Global_MLU() { static WorkSpace& Global_MLU() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kMLU))); static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kMLU)));
return *x; return *x;
} }
#endif #endif
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "lite/core/op_lite.h" #include "lite/core/op_lite.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/kernels/mlu/bridges/tensor.h" #include "lite/kernels/mlu/bridges/tensor.h"
#include "lite/utils/macros.h"
#define PRINT_HW_TIME false #define PRINT_HW_TIME false
...@@ -113,7 +114,7 @@ class Graph { ...@@ -113,7 +114,7 @@ class Graph {
void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) { void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) {
#if PRINT_HW_TIME #if PRINT_HW_TIME
thread_local float hw_time; static LITE_THREAD_LOCAL float hw_time;
CNRT_CALL(cnrtPlaceNotifier(notifier_start_, que)); CNRT_CALL(cnrtPlaceNotifier(notifier_start_, que));
#endif #endif
CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_, CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_,
......
...@@ -35,6 +35,7 @@ BUILD_RKNPU=OFF ...@@ -35,6 +35,7 @@ BUILD_RKNPU=OFF
RKNPU_DDK_ROOT="$(pwd)/rknpu/" RKNPU_DDK_ROOT="$(pwd)/rknpu/"
LITE_WITH_ARM_LANG=OFF LITE_WITH_ARM_LANG=OFF
PYTHON_EXECUTABLE_OPTION="" PYTHON_EXECUTABLE_OPTION=""
IOS_DEPLOYMENT_TARGET=9.0
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
...@@ -326,6 +327,7 @@ function make_ios { ...@@ -326,6 +327,7 @@ function make_ios {
-DARM_TARGET_ARCH_ABI=$abi \ -DARM_TARGET_ARCH_ABI=$abi \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \ -DLITE_WITH_CV=$BUILD_CV \
-DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
-DARM_TARGET_OS=$os -DARM_TARGET_OS=$os
make publish_inference -j$NUM_PROC make publish_inference -j$NUM_PROC
...@@ -434,6 +436,7 @@ function print_usage { ...@@ -434,6 +436,7 @@ function print_usage {
echo -e "--build_python: (OFF|ON); controls whether to publish python api lib (ANDROID and IOS is not supported)" echo -e "--build_python: (OFF|ON); controls whether to publish python api lib (ANDROID and IOS is not supported)"
echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)" echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)"
echo -e "--build_dir: directory for building" echo -e "--build_dir: directory for building"
echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment."
echo echo
echo -e "argument choices:" echo -e "argument choices:"
echo -e "--arm_os:\t android|ios|ios64" echo -e "--arm_os:\t android|ios|ios64"
...@@ -570,6 +573,10 @@ function main { ...@@ -570,6 +573,10 @@ function main {
RKNPU_DDK_ROOT="${i#*=}" RKNPU_DDK_ROOT="${i#*=}"
shift shift
;; ;;
--ios_deployment_target=*)
IOS_DEPLOYMENT_TARGET="${i#*=}"
shift
;;
tiny_publish) tiny_publish)
make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL
shift shift
......
...@@ -19,6 +19,7 @@ workspace=$PWD/$(dirname $0)/../../ ...@@ -19,6 +19,7 @@ workspace=$PWD/$(dirname $0)/../../
# options of striping lib according to input model. # options of striping lib according to input model.
OPTMODEL_DIR="" OPTMODEL_DIR=""
WITH_STRIP=OFF WITH_STRIP=OFF
IOS_DEPLOYMENT_TARGET=9.0
# num of threads used during compiling.. # num of threads used during compiling..
readonly NUM_PROC=${LITE_BUILD_THREADS:-4} readonly NUM_PROC=${LITE_BUILD_THREADS:-4}
##################################################################################################### #####################################################################################################
...@@ -77,6 +78,7 @@ function make_ios { ...@@ -77,6 +78,7 @@ function make_ios {
-DARM_TARGET_ARCH_ABI=$arch \ -DARM_TARGET_ARCH_ABI=$arch \
-DLITE_BUILD_EXTRA=$WITH_EXTRA \ -DLITE_BUILD_EXTRA=$WITH_EXTRA \
-DLITE_WITH_CV=$WITH_CV \ -DLITE_WITH_CV=$WITH_CV \
-DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
-DARM_TARGET_OS=$os -DARM_TARGET_OS=$os
make publish_inference -j$NUM_PROC make publish_inference -j$NUM_PROC
...@@ -101,6 +103,7 @@ function print_usage { ...@@ -101,6 +103,7 @@ function print_usage {
echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |" echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |"
echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |" echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |"
echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |" echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |"
echo -e "| --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment. |"
echo -e "| |" echo -e "| |"
echo -e "| arguments of striping lib according to input model:(armv8, gcc, c++_static) |" echo -e "| arguments of striping lib according to input model:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir |" echo -e "| ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir |"
...@@ -148,6 +151,10 @@ function main { ...@@ -148,6 +151,10 @@ function main {
WITH_EXCEPTION="${i#*=}" WITH_EXCEPTION="${i#*=}"
shift shift
;; ;;
--ios_deployment_target=*)
IOS_DEPLOYMENT_TARGET="${i#*=}"
shift
;;
help) help)
print_usage print_usage
exit 0 exit 0
......
...@@ -53,3 +53,14 @@ ...@@ -53,3 +53,14 @@
#if defined(__FLT_MAX__) #if defined(__FLT_MAX__)
#define FLT_MAX __FLT_MAX__ #define FLT_MAX __FLT_MAX__
#endif // __FLT_MAX__ #endif // __FLT_MAX__
#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \
(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000)
// Thread local storage will be ignored because the linker for iOS 8 does not
// support it.
#define LITE_THREAD_LOCAL
#elif __cplusplus >= 201103
#define LITE_THREAD_LOCAL thread_local
#else
#error "C++11 support is required for paddle-lite compilation."
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册