diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index fbf487e0065a28a19518673c1c1c9e793d913cfc..d4da1c429b5f66085b659047636383ecd546d937 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -36,12 +36,12 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { #endif #ifdef LITE_WITH_MLU Env::Init(); - lite::DeviceInfo::Global().SetMLURunMode(config.mlu_core_version(), - config.mlu_core_number(), - config.mlu_use_first_conv(), - config.mlu_first_conv_mean(), - config.mlu_first_conv_std(), - config.mlu_input_layout()); + lite::TargetWrapperMlu::SetMLURunMode(config.mlu_core_version(), + config.mlu_core_number(), + config.mlu_use_first_conv(), + config.mlu_first_conv_mean(), + config.mlu_first_conv_std(), + config.mlu_input_layout()); #endif // LITE_WITH_MLU auto places = config.valid_places(); std::vector passes{}; diff --git a/lite/backends/mlu/target_wrapper.cc b/lite/backends/mlu/target_wrapper.cc index 2385f69246a163830e0df855082d728da2743e02..f5c7eece48fe6db158237189b270aca9e396ea6c 100644 --- a/lite/backends/mlu/target_wrapper.cc +++ b/lite/backends/mlu/target_wrapper.cc @@ -36,6 +36,13 @@ void cnrtMemcpyDtoH(void* dst, const void* src, size_t size) { } // namespace mlu +thread_local cnmlCoreVersion_t TargetWrapperMlu::mlu_core_version_{CNML_MLU270}; +thread_local int TargetWrapperMlu::mlu_core_number_{1}; +thread_local bool TargetWrapperMlu::use_first_conv_{false}; +thread_local std::vector TargetWrapperMlu::mean_vec_; +thread_local std::vector TargetWrapperMlu::std_vec_; +thread_local DataLayoutType TargetWrapperMlu::input_layout_{DATALAYOUT(kNCHW)}; + size_t TargetWrapperMlu::num_devices() { uint32_t dev_count = 0; CNRT_CALL(cnrtGetDeviceCount(&dev_count)) << " cnrt get device count failed"; @@ -77,6 +84,47 @@ void TargetWrapperMlu::MemcpySync(void* dst, LOG(FATAL) << "Unsupported IoDirection" << static_cast(dir); } } +void TargetWrapperMlu::SetMLURunMode(lite_api::MLUCoreVersion core_version, + int core_number, + bool use_first_conv, + const std::vector& mean_vec, + const std::vector& std_vec, + DataLayoutType input_layout) { + switch (core_version) { + case (lite_api::MLUCoreVersion::MLU_220): + mlu_core_version_ = CNML_MLU220; + break; + case (lite_api::MLUCoreVersion::MLU_270): + mlu_core_version_ = CNML_MLU270; + break; + default: + mlu_core_version_ = CNML_MLU270; + break; + } + mlu_core_number_ = core_number; + use_first_conv_ = use_first_conv; + mean_vec_ = mean_vec; + std_vec_ = std_vec; + input_layout_ = input_layout; +} + +cnmlCoreVersion_t TargetWrapperMlu::MLUCoreVersion() { + return mlu_core_version_; +} + +int TargetWrapperMlu::MLUCoreNumber() { return mlu_core_number_; } + +bool TargetWrapperMlu::UseFirstConv() { return use_first_conv_; } + +// const std::vector& TargetWrapperMlu::MeanVec() const { return +// mean_vec_; } +const std::vector& TargetWrapperMlu::MeanVec() { return mean_vec_; } + +// const std::vector& TargetWrapperMlu::StdVec() const { return std_vec_; +// } +const std::vector& TargetWrapperMlu::StdVec() { return std_vec_; } + +DataLayoutType TargetWrapperMlu::InputLayout() { return input_layout_; } // void TargetWrapperMlu::MemcpyAsync(void* dst, // const void* src, diff --git a/lite/backends/mlu/target_wrapper.h b/lite/backends/mlu/target_wrapper.h index 2d9e10806f78e56f50b04d408dab219c923456fc..c1a35a9aa2b6150d92cc122810d6618cebdec006 100644 --- a/lite/backends/mlu/target_wrapper.h +++ b/lite/backends/mlu/target_wrapper.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include "lite/backends/mlu/mlu_utils.h" #include "lite/core/target_wrapper.h" @@ -43,11 +44,32 @@ class TargetWrapper { const void* src, size_t size, IoDirection dir); + static void SetMLURunMode(lite_api::MLUCoreVersion core_version, + int core_number, + bool use_first_conv, + const std::vector& mean_vec, + const std::vector& std_vec, + DataLayoutType input_layout); + static cnmlCoreVersion_t MLUCoreVersion(); + static int MLUCoreNumber(); + static bool UseFirstConv(); + // static const std::vector& MeanVec() const; + // static const std::vector& StdVec() const; + static const std::vector& MeanVec(); + static const std::vector& StdVec(); + static DataLayoutType InputLayout(); // static void MemcpyAsync(void* dst, // const void* src, // size_t size, // IoDirection dir, // const queue_t& queue); + private: + static thread_local cnmlCoreVersion_t mlu_core_version_; + static thread_local int mlu_core_number_; + static thread_local bool use_first_conv_; + static thread_local std::vector mean_vec_; + static thread_local std::vector std_vec_; + static thread_local DataLayoutType input_layout_; }; } // namespace lite diff --git a/lite/core/context.h b/lite/core/context.h index cdab4e473bf44c1b5b4ec6c0715ce44074ac63cf..6b826fe46f973d9812d76802a48b6d63f16b5081 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -227,10 +227,12 @@ class Context { void SetIoQueue(cnrtQueue_t queue) { io_queue_ = queue; } cnmlCoreVersion_t MLUCoreVersion() { - return DeviceInfo::Global().MLUCoreVersion(); + return paddle::lite::TargetWrapperMlu::MLUCoreVersion(); } - int MLUCoreNumber() { return DeviceInfo::Global().MLUCoreNumber(); } + int MLUCoreNumber() { + return paddle::lite::TargetWrapperMlu::MLUCoreNumber(); + } u32_t affinity() { return affinity_; } diff --git a/lite/core/device_info.cc b/lite/core/device_info.cc index 29ac96ed744b016833a746b35002dd68109efd8b..6038343689408793d835287be2bdf8ae314dd7b4 100644 --- a/lite/core/device_info.cc +++ b/lite/core/device_info.cc @@ -66,14 +66,14 @@ thread_local std::vector DeviceInfo::active_ids_; thread_local TensorLite DeviceInfo::workspace_; thread_local int64_t DeviceInfo::count_ = 0; -#ifdef LITE_WITH_MLU -thread_local cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270}; -thread_local int DeviceInfo::mlu_core_number_{1}; -thread_local bool DeviceInfo::use_first_conv_{false}; -thread_local std::vector DeviceInfo::mean_vec_; -thread_local std::vector DeviceInfo::std_vec_; -thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)}; -#endif +// #ifdef LITE_WITH_MLU +// thread_local cnmlCoreVersion_t DeviceInfo::mlu_core_version_{CNML_MLU270}; +// thread_local int DeviceInfo::mlu_core_number_{1}; +// thread_local bool DeviceInfo::use_first_conv_{false}; +// thread_local std::vector DeviceInfo::mean_vec_; +// thread_local std::vector DeviceInfo::std_vec_; +// thread_local DataLayoutType DeviceInfo::input_layout_{DATALAYOUT(kNCHW)}; +// #endif #ifdef TARGET_IOS const int DEFAULT_L1_CACHE_SIZE = 64 * 1024; @@ -1089,44 +1089,44 @@ int DeviceInfo::Setup() { return 0; } -#ifdef LITE_WITH_MLU -void DeviceInfo::SetMLURunMode(lite_api::MLUCoreVersion core_version, - int core_number, - bool use_first_conv, - const std::vector& mean_vec, - const std::vector& std_vec, - DataLayoutType input_layout) { - switch (core_version) { - case (lite_api::MLUCoreVersion::MLU_220): - mlu_core_version_ = CNML_MLU220; - break; - case (lite_api::MLUCoreVersion::MLU_270): - mlu_core_version_ = CNML_MLU270; - break; - default: - mlu_core_version_ = CNML_MLU270; - break; - } - mlu_core_number_ = core_number; - use_first_conv_ = use_first_conv; - mean_vec_ = mean_vec; - std_vec_ = std_vec; - input_layout_ = input_layout; -} - -cnmlCoreVersion_t DeviceInfo::MLUCoreVersion() { return mlu_core_version_; } - -int DeviceInfo::MLUCoreNumber() { return mlu_core_number_; } - -bool DeviceInfo::UseFirstConv() { return use_first_conv_; } - -const std::vector& DeviceInfo::MeanVec() const { return mean_vec_; } - -const std::vector& DeviceInfo::StdVec() const { return std_vec_; } - -DataLayoutType DeviceInfo::InputLayout() const { return input_layout_; } - -#endif // LITE_WITH_MLU +// #ifdef LITE_WITH_MLU +// void DeviceInfo::SetMLURunMode(lite_api::MLUCoreVersion core_version, +// int core_number, +// bool use_first_conv, +// const std::vector& mean_vec, +// const std::vector& std_vec, +// DataLayoutType input_layout) { +// switch (core_version) { +// case (lite_api::MLUCoreVersion::MLU_220): +// mlu_core_version_ = CNML_MLU220; +// break; +// case (lite_api::MLUCoreVersion::MLU_270): +// mlu_core_version_ = CNML_MLU270; +// break; +// default: +// mlu_core_version_ = CNML_MLU270; +// break; +// } +// mlu_core_number_ = core_number; +// use_first_conv_ = use_first_conv; +// mean_vec_ = mean_vec; +// std_vec_ = std_vec; +// input_layout_ = input_layout; +// } +// +// cnmlCoreVersion_t DeviceInfo::MLUCoreVersion() { return mlu_core_version_; } +// +// int DeviceInfo::MLUCoreNumber() { return mlu_core_number_; } +// +// bool DeviceInfo::UseFirstConv() { return use_first_conv_; } +// +// const std::vector& DeviceInfo::MeanVec() const { return mean_vec_; } +// +// const std::vector& DeviceInfo::StdVec() const { return std_vec_; } +// +// DataLayoutType DeviceInfo::InputLayout() const { return input_layout_; } +// +// #endif // LITE_WITH_MLU void DeviceInfo::SetRunMode(lite_api::PowerMode mode, int thread_num) { #ifdef ARM_WITH_OMP diff --git a/lite/core/device_info.h b/lite/core/device_info.h index 4e7e4742c4f6caa8a902f56fe953acd383fe2185..ae2e1fa551fad0c4fb4a5a498c80b4e85965da08 100644 --- a/lite/core/device_info.h +++ b/lite/core/device_info.h @@ -55,20 +55,20 @@ class DeviceInfo { int Setup(); void SetRunMode(lite_api::PowerMode mode, int thread_num); -#ifdef LITE_WITH_MLU - void SetMLURunMode(lite_api::MLUCoreVersion core_version, - int core_number, - bool use_first_conv, - const std::vector& mean_vec, - const std::vector& std_vec, - DataLayoutType input_layout); - cnmlCoreVersion_t MLUCoreVersion(); - int MLUCoreNumber(); - bool UseFirstConv(); - const std::vector& MeanVec() const; - const std::vector& StdVec() const; - DataLayoutType InputLayout() const; -#endif + // #ifdef LITE_WITH_MLU + // void SetMLURunMode(lite_api::MLUCoreVersion core_version, + // int core_number, + // bool use_first_conv, + // const std::vector& mean_vec, + // const std::vector& std_vec, + // DataLayoutType input_layout); + // cnmlCoreVersion_t MLUCoreVersion(); + // int MLUCoreNumber(); + // bool UseFirstConv(); + // const std::vector& MeanVec() const; + // const std::vector& StdVec() const; + // DataLayoutType InputLayout() const; + // #endif void SetCache(int l1size, int l2size, int l3size); void SetArch(ARMArch arch) { arch_ = arch; } @@ -120,14 +120,14 @@ class DeviceInfo { static thread_local TensorLite workspace_; static thread_local int64_t count_; -#ifdef LITE_WITH_MLU - static thread_local cnmlCoreVersion_t mlu_core_version_; - static thread_local int mlu_core_number_; - static thread_local bool use_first_conv_; - static thread_local std::vector mean_vec_; - static thread_local std::vector std_vec_; - static thread_local DataLayoutType input_layout_; -#endif + // #ifdef LITE_WITH_MLU + // static thread_local cnmlCoreVersion_t mlu_core_version_; + // static thread_local int mlu_core_number_; + // static thread_local bool use_first_conv_; + // static thread_local std::vector mean_vec_; + // static thread_local std::vector std_vec_; + // static thread_local DataLayoutType input_layout_; + // #endif void SetDotInfo(int argc, ...); void SetFP16Info(int argc, ...); diff --git a/lite/core/mir/mlu_postprocess_pass.cc b/lite/core/mir/mlu_postprocess_pass.cc index 191f1543f3d8097ea9103a2df737c1b1ad7f7721..515eab9d3f20ebf85c2c5abad6d84f109ec68068 100644 --- a/lite/core/mir/mlu_postprocess_pass.cc +++ b/lite/core/mir/mlu_postprocess_pass.cc @@ -569,11 +569,11 @@ void MLUPostprocessPass::Apply(const std::unique_ptr& graph) { // 1: feed->arg_in->subgraph->... 2: ...->subgraph->arg_out->fetch; // arg_in and arg_out are assumed to be NHWC which user should be aware of. // Thus here we change these args' layout to NHWC - if (lite::DeviceInfo::Global().InputLayout() == DATALAYOUT(kNHWC)) { + if (lite::TargetWrapperMlu::InputLayout() == DATALAYOUT(kNHWC)) { ModifyLayout(graph.get()); } - if (lite::DeviceInfo::Global().UseFirstConv()) { + if (lite::TargetWrapperMlu::UseFirstConv()) { GatherAndModifyFirstConvNodes(graph.get()); } diff --git a/lite/kernels/mlu/bridges/conv_op.cc b/lite/kernels/mlu/bridges/conv_op.cc index 5e88323b1efc2427c7e143dca53b21404e33742f..fc42b2a72a23cdfea4e9da16e5067d5414a1f286 100644 --- a/lite/kernels/mlu/bridges/conv_op.cc +++ b/lite/kernels/mlu/bridges/conv_op.cc @@ -164,7 +164,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { const auto input_scale = op_info->GetAttr("input_scale"); bool use_first_conv = false; - if (lite::DeviceInfo::Global().UseFirstConv() && input_dims[1] == 3) { + if (lite::TargetWrapperMlu::UseFirstConv() && input_dims[1] == 3) { use_first_conv = true; } @@ -192,11 +192,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { graph->FPType()); graph->BindConstRawData("first_conv_mean_tensor", - lite::DeviceInfo::Global().MeanVec().data(), + lite::TargetWrapperMlu::MeanVec().data(), 3, false); graph->BindConstRawData("first_conv_std_tensor", - lite::DeviceInfo::Global().StdVec().data(), + lite::TargetWrapperMlu::StdVec().data(), 3, false);