未验证 提交 914219cc 编写于 作者: 石晓伟 提交者: GitHub

platform portability of tls, test=develop (#4261)

* platform portability of tls, test=develop

* update build_ios.sh, test=develop

* add static keyword for tls, test=develop

* rename the alias of tls, test=develop
上级 60d5250f
......@@ -18,6 +18,7 @@
#include <utility>
#include "lite/backends/mlu/mlu_utils.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -37,12 +38,14 @@ void cnrtMemcpyDtoH(void* dst, const void* src, size_t size) {
} // namespace mlu
thread_local cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{CNML_MLU270};
thread_local int TargetWrapperMlu::mlu_core_number_{1};
thread_local bool TargetWrapperMlu::use_first_conv_{false};
thread_local std::vector<float> TargetWrapperMlu::mean_vec_;
thread_local std::vector<float> TargetWrapperMlu::std_vec_;
thread_local DataLayoutType TargetWrapperMlu::input_layout_{DATALAYOUT(kNCHW)};
LITE_THREAD_LOCAL cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{
CNML_MLU270};
LITE_THREAD_LOCAL int TargetWrapperMlu::mlu_core_number_{1};
LITE_THREAD_LOCAL bool TargetWrapperMlu::use_first_conv_{false};
LITE_THREAD_LOCAL std::vector<float> TargetWrapperMlu::mean_vec_;
LITE_THREAD_LOCAL std::vector<float> TargetWrapperMlu::std_vec_;
LITE_THREAD_LOCAL DataLayoutType TargetWrapperMlu::input_layout_{
DATALAYOUT(kNCHW)};
size_t TargetWrapperMlu::num_devices() {
uint32_t dev_count = 0;
......
......@@ -17,6 +17,7 @@
#include <vector>
#include "lite/backends/mlu/mlu_utils.h"
#include "lite/core/target_wrapper.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -58,12 +59,12 @@ class TargetWrapper<TARGET(kMLU)> {
static DataLayoutType InputLayout();
private:
static thread_local cnmlCoreVersion_t mlu_core_version_;
static thread_local int mlu_core_number_;
static thread_local bool use_first_conv_;
static thread_local std::vector<float> mean_vec_;
static thread_local std::vector<float> std_vec_;
static thread_local DataLayoutType input_layout_;
static LITE_THREAD_LOCAL cnmlCoreVersion_t mlu_core_version_;
static LITE_THREAD_LOCAL int mlu_core_number_;
static LITE_THREAD_LOCAL bool use_first_conv_;
static LITE_THREAD_LOCAL std::vector<float> mean_vec_;
static LITE_THREAD_LOCAL std::vector<float> std_vec_;
static LITE_THREAD_LOCAL DataLayoutType input_layout_;
};
} // namespace lite
......
......@@ -24,6 +24,7 @@
#include "lite/backends/x86/jit/kernel_key.h"
#include "lite/backends/x86/jit/kernel_pool.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -178,7 +179,7 @@ class KernelFuncs {
public:
KernelFuncs() = default;
static KernelFuncs& Cache() {
static thread_local KernelFuncs<KernelTuple, PlaceType> g_func_cache;
static LITE_THREAD_LOCAL KernelFuncs<KernelTuple, PlaceType> g_func_cache;
return g_func_cache;
}
......
......@@ -22,6 +22,7 @@
#include "lite/backends/x86/jit/gen_base.h"
#include "lite/backends/x86/jit/kernel_base.h"
#include "lite/backends/x86/jit/kernel_key.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -35,7 +36,7 @@ class JitCodePool {
public:
JitCodePool() = default;
static JitCodePool& Instance() {
static thread_local JitCodePool<KT> g_jit_codes;
static LITE_THREAD_LOCAL JitCodePool<KT> g_jit_codes;
return g_jit_codes;
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "lite/backends/xpu/target_wrapper.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -55,7 +56,7 @@ XPUScratchPadGuard TargetWrapperXPU::MallocScratchPad(size_t size,
std::string TargetWrapperXPU::multi_encoder_precision; // NOLINT
int TargetWrapperXPU::workspace_l3_size_per_thread{0};
thread_local xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr};
LITE_THREAD_LOCAL xdnn::Context* TargetWrapperXPU::tls_raw_ctx_{nullptr};
} // namespace lite
} // namespace paddle
......@@ -18,6 +18,7 @@
#include "lite/backends/xpu/xpu_header_sitter.h" // xpu_free
#include "lite/core/target_wrapper.h" // TargetWrapper
#include "lite/utils/cp_logging.h" // CHECK_EQ
#include "lite/utils/macros.h"
#define XPU_CALL(func) \
{ \
......@@ -99,7 +100,7 @@ class TargetWrapper<TARGET(kXPU)> {
static int workspace_l3_size_per_thread;
private:
static thread_local xdnn::Context* tls_raw_ctx_;
static LITE_THREAD_LOCAL xdnn::Context* tls_raw_ctx_;
};
} // namespace lite
......
......@@ -13,15 +13,16 @@
// limitations under the License.
#include "lite/core/context.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
#ifdef LITE_WITH_HUAWEI_ASCEND_NPU
thread_local std::string
LITE_THREAD_LOCAL std::string
Context<TargetType::kHuaweiAscendNPU>::subgraph_model_cache_dir_{
""}; // NOLINT
thread_local int
LITE_THREAD_LOCAL int
Context<TargetType::kHuaweiAscendNPU>::huawei_ascend_device_id_{
0}; // NOLINT
#endif
......
......@@ -44,6 +44,7 @@
#include "lite/core/tensor.h"
#include "lite/utils/all.h"
#include "lite/utils/env.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -128,8 +129,8 @@ class Context<TargetType::kHuaweiAscendNPU> {
static int HuaweiAscendDeviceID() { return huawei_ascend_device_id_; }
private:
static thread_local std::string subgraph_model_cache_dir_;
static thread_local int huawei_ascend_device_id_;
static LITE_THREAD_LOCAL std::string subgraph_model_cache_dir_;
static LITE_THREAD_LOCAL int huawei_ascend_device_id_;
};
#endif
......
......@@ -54,17 +54,18 @@
#include <algorithm>
#include <limits>
#include "lite/core/device_info.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
#if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
thread_local lite_api::PowerMode DeviceInfo::mode_;
thread_local ARMArch DeviceInfo::arch_;
thread_local int DeviceInfo::mem_size_;
thread_local std::vector<int> DeviceInfo::active_ids_;
thread_local TensorLite DeviceInfo::workspace_;
thread_local int64_t DeviceInfo::count_ = 0;
LITE_THREAD_LOCAL lite_api::PowerMode DeviceInfo::mode_;
LITE_THREAD_LOCAL ARMArch DeviceInfo::arch_;
LITE_THREAD_LOCAL int DeviceInfo::mem_size_;
LITE_THREAD_LOCAL std::vector<int> DeviceInfo::active_ids_;
LITE_THREAD_LOCAL TensorLite DeviceInfo::workspace_;
LITE_THREAD_LOCAL int64_t DeviceInfo::count_ = 0;
#ifdef TARGET_IOS
const int DEFAULT_L1_CACHE_SIZE = 64 * 1024;
......
......@@ -22,6 +22,7 @@
#ifdef LITE_WITH_MLU
#include "lite/backends/mlu/mlu_utils.h"
#endif
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
......@@ -99,12 +100,12 @@ class DeviceInfo {
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
static thread_local lite_api::PowerMode mode_;
static thread_local ARMArch arch_;
static thread_local int mem_size_;
static thread_local std::vector<int> active_ids_;
static thread_local TensorLite workspace_;
static thread_local int64_t count_;
static LITE_THREAD_LOCAL lite_api::PowerMode mode_;
static LITE_THREAD_LOCAL ARMArch arch_;
static LITE_THREAD_LOCAL int mem_size_;
static LITE_THREAD_LOCAL std::vector<int> active_ids_;
static LITE_THREAD_LOCAL TensorLite workspace_;
static LITE_THREAD_LOCAL int64_t count_;
void SetDotInfo(int argc, ...);
void SetFP16Info(int argc, ...);
......
......@@ -23,12 +23,13 @@
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/subgraph/subgraph_detector.h"
#include "lite/operators/subgraph_op.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
namespace mir {
static thread_local int g_stream_id = 0;
static LITE_THREAD_LOCAL int g_stream_id = 0;
Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
const std::string& cast_arg_name,
......
......@@ -50,7 +50,8 @@ class WorkSpace {
}
static WorkSpace& Global_Host() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kHost)));
static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kHost)));
return *x;
}
......@@ -64,14 +65,16 @@ class WorkSpace {
#if defined(LITE_WITH_CUDA)
static WorkSpace& Global_CUDA() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kCUDA)));
static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kCUDA)));
return *x;
}
#endif
#if defined(LITE_WITH_MLU)
static WorkSpace& Global_MLU() {
thread_local std::unique_ptr<WorkSpace> x(new WorkSpace(TARGET(kMLU)));
static LITE_THREAD_LOCAL std::unique_ptr<WorkSpace> x(
new WorkSpace(TARGET(kMLU)));
return *x;
}
#endif
......
......@@ -24,6 +24,7 @@
#include "lite/core/tensor.h"
#include "lite/kernels/mlu/bridges/tensor.h"
#include "lite/utils/env.h"
#include "lite/utils/macros.h"
#define PRINT_HW_TIME false
......@@ -135,7 +136,7 @@ class Graph {
#define MEASURE_HWTIME_END(que) \
do { \
thread_local float hw_time; \
static LITE_THREAD_LOCAL float hw_time; \
CNRT_CALL(cnrtPlaceNotifier(notifier_end_, que)); \
CNRT_CALL(cnrtSyncQueue(que)); \
CNRT_CALL(cnrtNotifierDuration(notifier_start_, notifier_end_, &hw_time)); \
......
......@@ -19,6 +19,7 @@
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/mlu/subgraph_compute.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
namespace subgraph {
......@@ -29,7 +30,7 @@ void PrepareInput(Graph* graph,
const std::string& input_name,
Tensor* input_tensor,
cnmlDataOrder_t order) {
thread_local Tensor temp_input;
static LITE_THREAD_LOCAL Tensor temp_input;
temp_input.Resize(input_tensor->dims().Vectorize());
temp_input.CopyDataFrom(*input_tensor);
using data_type = typename MLUTypeTraits<Dtype>::type;
......
......@@ -38,6 +38,7 @@ WITH_HUAWEI_ASCEND_NPU=OFF # Huawei Ascend Builder/Runtime Libs on X86 host
HUAWEI_ASCEND_NPU_DDK_ROOT="/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux_gcc4.8.5"
PYTHON_EXECUTABLE_OPTION=""
ENABLE_FLATBUFFERS_DESC_VIEW=OFF
IOS_DEPLOYMENT_TARGET=9.0
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
......@@ -321,6 +322,7 @@ function make_ios {
-DARM_TARGET_ARCH_ABI=$abi \
-DLITE_BUILD_EXTRA=$BUILD_EXTRA \
-DLITE_WITH_CV=$BUILD_CV \
-DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
-DARM_TARGET_OS=$os
make publish_inference -j$NUM_PROC
......@@ -437,6 +439,7 @@ function print_usage {
echo -e "--build_java: (OFF|ON); controls whether to publish java api lib (Only ANDROID is supported)"
echo -e "--build_dir: directory for building"
echo -e "--enable_flatbuffers_view: (OFF|ON); Use the flatbuffers read-only view to load the model. If ON, the naive buffer will no longer be supported."
echo -e "--ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment."
echo
echo -e "argument choices:"
echo -e "--arm_os:\t android|ios|ios64"
......@@ -585,6 +588,10 @@ function main {
ENABLE_FLATBUFFERS_DESC_VIEW="${i#*=}"
shift
;;
--ios_deployment_target=*)
IOS_DEPLOYMENT_TARGET="${i#*=}"
shift
;;
tiny_publish)
make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL
shift
......
......@@ -19,6 +19,7 @@ workspace=$PWD/$(dirname $0)/../../
# options of striping lib according to input model.
OPTMODEL_DIR=""
WITH_STRIP=OFF
IOS_DEPLOYMENT_TARGET=9.0
# num of threads used during compiling..
readonly NUM_PROC=${LITE_BUILD_THREADS:-4}
#####################################################################################################
......@@ -80,6 +81,7 @@ function make_ios {
-DARM_TARGET_ARCH_ABI=$arch \
-DLITE_BUILD_EXTRA=$WITH_EXTRA \
-DLITE_WITH_CV=$WITH_CV \
-DDEPLOYMENT_TARGET=${IOS_DEPLOYMENT_TARGET} \
-DARM_TARGET_OS=$os
make publish_inference -j$NUM_PROC
......@@ -104,6 +106,7 @@ function print_usage {
echo -e "| --with_log: (OFF|ON); controls whether to print log information, default is ON |"
echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |"
echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |"
echo -e "| --ios_deployment_target: (default: 9.0); Set the minimum compatible system version for ios deployment. |"
echo -e "| |"
echo -e "| arguments of striping lib according to input model:(armv8, gcc, c++_static) |"
echo -e "| ./lite/tools/build_android.sh --with_strip=ON --opt_model_dir=YourOptimizedModelDir |"
......@@ -151,6 +154,10 @@ function main {
WITH_EXCEPTION="${i#*=}"
shift
;;
--ios_deployment_target=*)
IOS_DEPLOYMENT_TARGET="${i#*=}"
shift
;;
help)
print_usage
exit 0
......
......@@ -53,3 +53,14 @@
#if defined(__FLT_MAX__)
#define FLT_MAX __FLT_MAX__
#endif // __FLT_MAX__
#if (defined __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \
(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 90000)
// Thread local storage will be ignored because the linker for iOS 8 does not
// support it.
#define LITE_THREAD_LOCAL
#elif __cplusplus >= 201103
#define LITE_THREAD_LOCAL thread_local
#else
#error "C++11 support is required for paddle-lite compilation."
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册