diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index f1a9a23e8d034be6c863a1c29ff19a3e831f35a2..f8229f2cea9a7e2b34c99235505a67b40d8d3f3d 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -28,9 +28,10 @@ double time_diff(Time t1, Time t2) { return counter.count() / 1000.0; } -void Run(const char* model_dir, int repeat) { +void Run(const char* model_dir, int repeat, int thread_num) { #ifdef LITE_WITH_ARM DeviceInfo::Init(); + DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num); #endif lite::ExecutorLite predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, @@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) { } // namespace paddle int main(int argc, char** argv) { - CHECK_EQ(argc, 3) << "usage: ./cmd "; - paddle::lite::Run(argv[1], std::stoi(argv[2])); + CHECK_EQ(argc, 4) << "usage: ./cmd "; + paddle::lite::Run(argv[1], std::stoi(argv[2]), std::stoi(argv[3])); return 0; } diff --git a/paddle/fluid/lite/core/context.cc b/paddle/fluid/lite/core/context.cc index 89ec7278c1aaf8e372c45f24a32525df4f223418..aae36b4a23133cef5b6a477e9144d5a14c90e45f 100644 --- a/paddle/fluid/lite/core/context.cc +++ b/paddle/fluid/lite/core/context.cc @@ -13,322 +13,7 @@ // limitations under the License. #include "paddle/fluid/lite/core/context.h" -#include "paddle/fluid/lite/core/cpu_info.h" - -#ifdef LITE_WITH_LINUX -#include -#include -#endif -#if __APPLE__ -#include "TargetConditionals.h" -#if TARGET_OS_IPHONE -#include -#include -#include -#endif // TARGET_OS_IPHONE -#endif // __APPLE__ - -#ifdef ARM_WITH_OMP -#include -#endif namespace paddle { -namespace lite { - -#ifdef LITE_WITH_ARM - -void Context::SetCache(int l1size, int l2size, int l3size) { - DeviceInfo& dev = DeviceInfo::Global(); - int cpu_count = arm_get_cpucount(); - dev.L1_cache_.resize(cpu_count); - dev.L2_cache_.resize(cpu_count); - dev.L3_cache_.resize(cpu_count); - for (int i = 0; i < cpu_count; ++i) { - dev.L1_cache_[i] = l1size; - dev.L2_cache_[i] = l2size; - dev.L3_cache_[i] = l3size; - } - workspace_.Resize({2 * (l1size + l2size)}); -} - -Context::Context() { - active_ids_ = {0}; - mode_ = LITE_POWER_HIGH; - DeviceInfo& dev = DeviceInfo::Global(); - workspace_.Resize( - {static_cast(dev.L2_cache_[active_ids_[0]] / sizeof(float))}); -#ifdef TARGET_IOS - arch_ = APPLE; // use 6x8 -#else - if (dev.big_core_ids_.size() > 0) { - arch_ = dev.archs_[dev.big_core_ids_[0]]; - } -#endif -} - -PowerMode Context::mode() const { return mode_; } - -int Context::threads() const { return active_ids_.size(); } - -Context::Context(const ARMContext& ctx) { - mode_ = ctx.mode_; - active_ids_ = ctx.active_ids_; - workspace_ = ctx.workspace_; - arch_ = ctx.arch_; - count_ = ctx.count_; -} - -ARMContext& Context::operator=(const ARMContext& ctx) { - mode_ = ctx.mode_; - active_ids_ = ctx.active_ids_; - workspace_ = ctx.workspace_; - arch_ = ctx.arch_; - count_ = ctx.count_; - return *this; -} - -void Context::BindDev() { -#ifdef ARM_WITH_OMP - int num_threads = active_ids_.size(); - omp_set_num_threads(num_threads); -#ifdef LITE_WITH_LINUX - std::vector ssarets; - for (int j = 0; j < num_threads; ++j) { - ssarets.push_back(0); - } -#pragma omp parallel for - for (int i = 0; i < num_threads; i++) { - ssarets[i] = set_sched_affinity(active_ids_); - } - for (int i = 0; i < num_threads; i++) { - if (ssarets[i] != 0) { - LOG(ERROR) << "set cpu affinity failed, cpuID: " << active_ids_[i]; - return; - } - } -#endif // LITE_WITH_LINUX -#else // ARM_WITH_OMP -#ifdef LITE_WITH_LINUX - std::vector cpuid1; - cpuid1.push_back(active_ids_[0]); - int ssaret = set_sched_affinity(cpuid1); - if (ssaret != 0) { - printf("set cpu affinity failed, cpuID: %d\n", active_ids_[0]); - return; - } -#endif // LITE_WITH_LINUX -#endif // ARM_WITH_OMP -} - -void Context::SetRunMode(PowerMode mode, int threads) { - DeviceInfo& dev = DeviceInfo::Global(); - int big_core_size = dev.big_core_ids_.size(); - int small_core_size = dev.little_core_ids_.size(); - if (threads > big_core_size + small_core_size) { - threads = big_core_size + small_core_size; - } -#ifdef ARM_WITH_OMP - count_++; - int shift_num = (count_ / 10) % big_core_size; - switch (mode) { - case LITE_POWER_FULL: - mode_ = mode; - active_ids_.clear(); - for (int i = 0; i < threads; ++i) { - if (i < big_core_size) { - active_ids_.push_back(dev.big_core_ids_[i]); - } else { - active_ids_.push_back(dev.little_core_ids_[i - big_core_size]); - } - } - if (active_ids_.size() == 0) { - active_ids_.push_back(0); - } - break; - case LITE_POWER_HIGH: - active_ids_.clear(); - if (big_core_size > 0) { - mode_ = LITE_POWER_HIGH; - if (threads > big_core_size) { - LOG(ERROR) << "threads: " << threads - << ", exceed the big cores size: " << big_core_size; - active_ids_ = dev.big_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.big_core_ids_[i]); - } - } - } else { - mode_ = LITE_POWER_LOW; - LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores"; - if (threads > small_core_size) { - active_ids_ = dev.little_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.little_core_ids_[i]); - } - } - } - if (active_ids_.size() == 0) { - active_ids_.push_back(0); - } - break; - case LITE_POWER_LOW: - active_ids_.clear(); - if (small_core_size > 0) { - mode_ = LITE_POWER_LOW; - if (threads > small_core_size) { - LOG(WARNING) << "threads: " << threads - << ", exceed the little cores size: " << small_core_size; - active_ids_ = dev.little_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.little_core_ids_[i]); - } - } - } else { - mode_ = LITE_POWER_HIGH; - LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; - if (threads > big_core_size) { - active_ids_ = dev.big_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.big_core_ids_[i]); - } - } - } - if (active_ids_.size() == 0) { - active_ids_.push_back(0); - } - break; - case LITE_POWER_NO_BIND: - mode_ = LITE_POWER_NO_BIND; - active_ids_.clear(); - if (threads > dev.core_ids_.size()) { - active_ids_.resize(dev.core_ids_.size()); - } else { - active_ids_.resize(threads); - } - break; - case LITE_POWER_RAND_HIGH: - active_ids_.clear(); - if (big_core_size > 0) { - mode_ = LITE_POWER_RAND_HIGH; - if (threads > big_core_size) { - LOG(WARNING) << "threads: " << threads - << ", exceed the big cores size: " << big_core_size; - active_ids_ = dev.big_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back( - dev.big_core_ids_[(i + shift_num) % big_core_size]); - } - } - } else { - mode_ = LITE_POWER_LOW; - LOG(WARNING) - << "HIGH POWER MODE is not support, switch to little cores"; - if (threads > small_core_size) { - active_ids_ = dev.little_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.little_core_ids_[i]); - } - } - } - if (active_ids_.size() == 0) { - active_ids_.push_back(0); - } - break; - case LITE_POWER_RAND_LOW: - active_ids_.clear(); - if (small_core_size > 0) { - mode_ = LITE_POWER_RAND_LOW; - if (threads > small_core_size) { - LOG(WARNING) << "threads: " << threads - << ", exceed the little cores size: " << small_core_size; - active_ids_ = dev.little_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back( - dev.little_core_ids_[(i + shift_num) % small_core_size]); - } - } - } else { - mode_ = LITE_POWER_HIGH; - LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; - if (threads > big_core_size) { - active_ids_ = dev.big_core_ids_; - } else { - for (int i = 0; i < threads; ++i) { - active_ids_.push_back(dev.big_core_ids_[i]); - } - } - } - if (active_ids_.size() == 0) { - active_ids_.push_back(0); - } - break; - } - //! fix multi-threads LITE_POWER_HIGH mode - if (mode_ == LITE_POWER_NO_BIND || threads > 1) { - int threads = active_ids_.size(); - omp_set_num_threads(threads); - } else { - if (check_online(active_ids_)) { - BindDev(); - } else { - LOG(ERROR) << "core id " << active_ids_[0] - << " is offline, switch to NO BIND MODE"; - int threads = active_ids_.size(); - omp_set_num_threads(threads); - } - } -#else - if (big_core_size > 0) { - active_ids_ = {dev.big_core_ids_[0]}; - } else { - active_ids_ = {0}; - } -#endif - //! alloc memory for sgemm in this context - int temp_mem_size = - DeviceInfo::Global().L2_cache_[active_ids_[0]] / sizeof(float); - workspace_.Resize({temp_mem_size}); - arch_ = DeviceInfo::Global().archs_[active_ids_[0]]; -} - -ARMArch Context::arch() const { return arch_; } - -void Context::SetArch(ARMArch arch) { arch_ = arch; } - -int Context::l1_cache_size() const { - DeviceInfo& dev = DeviceInfo::Global(); - return dev.L1_cache_[active_ids_[0]]; -} - -int Context::l2_cache_size() const { - DeviceInfo& dev = DeviceInfo::Global(); - return dev.L2_cache_[active_ids_[0]]; -} - -int Context::l3_cache_size() const { - DeviceInfo& dev = DeviceInfo::Global(); - return dev.L3_cache_[active_ids_[0]]; -} - -bool Context::ExtendWorkspace(DDimLite dims) { - auto count = dims.product(); - auto old = workspace_.dims(); - if (count == old.product()) { - return false; - } - - workspace_.Resize( - {static_cast(count + l2_cache_size() / sizeof(float))}); - return true; -} -#endif // LITE_WITH_ARM - -} // namespace lite +namespace lite {} // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/core/context.h b/paddle/fluid/lite/core/context.h index 483f51541440fe51e1ee998f07f2e5e12f2441fd..81041dfc9cf2543d06c9b508be8d69ddd3d94904 100644 --- a/paddle/fluid/lite/core/context.h +++ b/paddle/fluid/lite/core/context.h @@ -61,47 +61,42 @@ class Context { template <> class Context { public: - Context(); - Context(PowerMode mode, int threads); + Context() {} explicit Context(const ARMContext& ctx); - ARMContext& operator=(const ARMContext& ctx); + ARMContext& operator=(const ARMContext& ctx) {} // NOTE: InitOnce should only be used by ContextScheduler - void InitOnce() { DeviceInfo::Init(); } + void InitOnce() {} void CopyShared(const ARMContext* ctx) {} - void SetRunMode(PowerMode mode, int threads); - void SetCache(int l1size, int l2size, int l3size); - void SetArch(ARMArch arch); - void BindDev(); + void SetRunMode(PowerMode mode, int threads) { + return DeviceInfo::Global().SetRunMode(mode, threads); + } + void SetCache(int l1size, int l2size, int l3size) { + return DeviceInfo::Global().SetCache(l1size, l2size, l3size); + } + void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); } + void BindDev() { return DeviceInfo::Global().BindDev(); } - PowerMode mode() const; - int threads() const; - ARMArch arch() const; + PowerMode mode() const { return DeviceInfo::Global().mode(); } + int threads() const { return DeviceInfo::Global().threads(); } + ARMArch arch() const { return DeviceInfo::Global().arch(); } template T* workspace_data() { - return workspace_.mutable_data(); + return DeviceInfo::Global().workspace_data(); } - int l1_cache_size() const; - int l2_cache_size() const; - int l3_cache_size() const; - bool ExtendWorkspace(DDimLite dims); + int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); } + int l2_cache_size() const { return DeviceInfo::Global().l2_cache_size(); } + int l3_cache_size() const { return DeviceInfo::Global().l3_cache_size(); } + bool ExtendWorkspace(DDimLite dims) { + return DeviceInfo::Global().ExtendWorkspace(dims); + } std::string name() const { return "ARMContext"; } - - private: - // LITE_POWER_HIGH stands for using big cores, - // LITE_POWER_LOW stands for using small core, - // LITE_POWER_FULL stands for using all cores - ARMArch arch_; - PowerMode mode_; - std::vector active_ids_; - TensorLite workspace_; - int64_t count_{0}; }; #endif diff --git a/paddle/fluid/lite/core/cpu_info.cc b/paddle/fluid/lite/core/cpu_info.cc index ab1968295813006d5d11fc4fbf416b4f9c3a3215..30fc36f00e5187b643809e0e037fcca0d5410fb4 100644 --- a/paddle/fluid/lite/core/cpu_info.cc +++ b/paddle/fluid/lite/core/cpu_info.cc @@ -12,8 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef LITE_WITH_LINUX +#include +#include +#endif +#if __APPLE__ +#include "TargetConditionals.h" +#if TARGET_OS_IPHONE +#include +#include +#include +#endif // TARGET_OS_IPHONE +#endif // __APPLE__ + +#ifdef ARM_WITH_OMP +#include +#endif + #include "paddle/fluid/lite/core/cpu_info.h" -#include namespace paddle { namespace lite { @@ -73,6 +89,252 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) { #elif defined(TARGET_IOS) arm_get_cpu_arch(&dev->archs_); #endif + dev->active_ids_ = {0}; + dev->mode_ = LITE_POWER_HIGH; + dev->workspace_.Resize({static_cast( + dev->L2_cache_[dev->active_ids_[0]] / sizeof(float))}); +#ifdef TARGET_IOS + dev->arch_ = APPLE; // use 6x8 +#else + if (dev->big_core_ids_.size() > 0) { + dev->arch_ = dev->archs_[dev->big_core_ids_[0]]; + } +#endif +} + +void DeviceInfo::SetCache(int l1size, int l2size, int l3size) { + int cpu_count = arm_get_cpucount(); + L1_cache_.resize(cpu_count); + L2_cache_.resize(cpu_count); + L3_cache_.resize(cpu_count); + for (int i = 0; i < cpu_count; ++i) { + L1_cache_[i] = l1size; + L2_cache_[i] = l2size; + L3_cache_[i] = l3size; + } + workspace_.Resize({2 * (l1size + l2size)}); +} + +void DeviceInfo::BindDev() { +#ifdef ARM_WITH_OMP + int num_threads = active_ids_.size(); + omp_set_num_threads(num_threads); +#ifdef LITE_WITH_LINUX + std::vector ssarets; + for (int j = 0; j < num_threads; ++j) { + ssarets.push_back(0); + } +#pragma omp parallel for + for (int i = 0; i < num_threads; i++) { + ssarets[i] = set_sched_affinity(active_ids_); + } + for (int i = 0; i < num_threads; i++) { + if (ssarets[i] != 0) { + LOG(ERROR) << "set cpu affinity failed, cpuID: " << active_ids_[i]; + return; + } + } +#endif // LITE_WITH_LINUX +#else // ARM_WITH_OMP +#ifdef LITE_WITH_LINUX + std::vector cpuid1; + cpuid1.push_back(active_ids_[0]); + int ssaret = set_sched_affinity(cpuid1); + if (ssaret != 0) { + printf("set cpu affinity failed, cpuID: %d\n", active_ids_[0]); + return; + } +#endif // LITE_WITH_LINUX +#endif // ARM_WITH_OMP +} + +void DeviceInfo::SetRunMode(PowerMode mode, int threads) { + LOG(INFO) << "ARM SetRunMode called"; + int big_core_size = big_core_ids_.size(); + int small_core_size = little_core_ids_.size(); + if (threads > big_core_size + small_core_size) { + threads = big_core_size + small_core_size; + } +#ifdef ARM_WITH_OMP + count_++; + int shift_num = (count_ / 10) % big_core_size; + switch (mode) { + case LITE_POWER_FULL: + mode_ = mode; + active_ids_.clear(); + for (int i = 0; i < threads; ++i) { + if (i < big_core_size) { + active_ids_.push_back(big_core_ids_[i]); + } else { + active_ids_.push_back(little_core_ids_[i - big_core_size]); + } + } + if (active_ids_.size() == 0) { + active_ids_.push_back(0); + } + break; + case LITE_POWER_HIGH: + active_ids_.clear(); + if (big_core_size > 0) { + mode_ = LITE_POWER_HIGH; + if (threads > big_core_size) { + LOG(ERROR) << "threads: " << threads + << ", exceed the big cores size: " << big_core_size; + active_ids_ = big_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(big_core_ids_[i]); + } + } + } else { + mode_ = LITE_POWER_LOW; + LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores."; + if (threads > small_core_size) { + active_ids_ = little_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(little_core_ids_[i]); + } + } + } + if (active_ids_.size() == 0) { + active_ids_.push_back(0); + } + break; + case LITE_POWER_LOW: + active_ids_.clear(); + if (small_core_size > 0) { + mode_ = LITE_POWER_LOW; + if (threads > small_core_size) { + LOG(WARNING) << "threads: " << threads + << ", exceed the little cores size: " << small_core_size; + active_ids_ = little_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(little_core_ids_[i]); + } + } + } else { + mode_ = LITE_POWER_HIGH; + LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; + if (threads > big_core_size) { + active_ids_ = big_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(big_core_ids_[i]); + } + } + } + if (active_ids_.size() == 0) { + active_ids_.push_back(0); + } + break; + case LITE_POWER_NO_BIND: + mode_ = LITE_POWER_NO_BIND; + active_ids_.clear(); + if (threads > core_ids_.size()) { + active_ids_.resize(core_ids_.size()); + } else { + active_ids_.resize(threads); + } + break; + case LITE_POWER_RAND_HIGH: + active_ids_.clear(); + if (big_core_size > 0) { + mode_ = LITE_POWER_RAND_HIGH; + if (threads > big_core_size) { + LOG(WARNING) << "threads: " << threads + << ", exceed the big cores size: " << big_core_size; + active_ids_ = big_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back( + big_core_ids_[(i + shift_num) % big_core_size]); + } + } + } else { + mode_ = LITE_POWER_LOW; + LOG(WARNING) + << "HIGH POWER MODE is not support, switch to little cores."; + if (threads > small_core_size) { + active_ids_ = little_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(little_core_ids_[i]); + } + } + } + if (active_ids_.size() == 0) { + active_ids_.push_back(0); + } + break; + case LITE_POWER_RAND_LOW: + active_ids_.clear(); + if (small_core_size > 0) { + mode_ = LITE_POWER_RAND_LOW; + if (threads > small_core_size) { + LOG(WARNING) << "threads: " << threads + << ", exceed the little cores size: " << small_core_size; + active_ids_ = little_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back( + little_core_ids_[(i + shift_num) % small_core_size]); + } + } + } else { + mode_ = LITE_POWER_HIGH; + LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores."; + if (threads > big_core_size) { + active_ids_ = big_core_ids_; + } else { + for (int i = 0; i < threads; ++i) { + active_ids_.push_back(big_core_ids_[i]); + } + } + } + if (active_ids_.size() == 0) { + active_ids_.push_back(0); + } + break; + } + //! fix multi-threads LITE_POWER_HIGH mode + if (mode_ == LITE_POWER_NO_BIND || threads > 1) { + int threads = active_ids_.size(); + omp_set_num_threads(threads); + } else { + if (check_online(active_ids_)) { + BindDev(); + } else { + LOG(WARNING) << "core id " << active_ids_[0] + << " is offline, switch to NO BIND MODE"; + int threads = active_ids_.size(); + omp_set_num_threads(threads); + } + } +#else + if (big_core_size > 0) { + active_ids_ = {big_core_ids_[0]}; + } else { + active_ids_ = {0}; + } +#endif + //! alloc memory for sgemm in this context + int temp_mem_size = L2_cache_[active_ids_[0]] / sizeof(float); + workspace_.Resize({temp_mem_size}); + arch_ = archs_[active_ids_[0]]; +} + +bool DeviceInfo::ExtendWorkspace(DDimLite dims) { + auto count = dims.product(); + auto old = workspace_.dims(); + if (count == old.product()) { + return false; + } + + workspace_.Resize({static_cast( + count + L2_cache_[active_ids_[0]] / sizeof(float))}); + return true; } // cache_id : 0 -> L1, 1 -> L2, 2 -> L3 diff --git a/paddle/fluid/lite/core/cpu_info.h b/paddle/fluid/lite/core/cpu_info.h index 385954e6d8e480cbc17fc2ec467f88d7d24331fb..b8c6ae95dcd87326e1b9b24227226e4f55e2b5db 100644 --- a/paddle/fluid/lite/core/cpu_info.h +++ b/paddle/fluid/lite/core/cpu_info.h @@ -16,22 +16,9 @@ #include #include +#include "paddle/fluid/lite/core/lite_tensor.h" #include "paddle/fluid/lite/utils/cp_logging.h" -#ifdef LITE_WITH_LINUX -#include -#include -#endif - -#if __APPLE__ -#include "TargetConditionals.h" -#if TARGET_OS_IPHONE -#include -#include -#include -#endif // TARGET_OS_IPHONE -#endif // __APPLE__ - namespace paddle { namespace lite { @@ -80,6 +67,15 @@ class DeviceInfo { std::vector cluster_ids_; std::vector archs_; + ARMArch arch_; + // LITE_POWER_HIGH stands for using big cores, + // LITE_POWER_LOW stands for using small core, + // LITE_POWER_FULL stands for using all cores + PowerMode mode_; + std::vector active_ids_; + TensorLite workspace_; + int64_t count_{0}; + static DeviceInfo& Global() { static auto* x = new DeviceInfo; return *x; @@ -90,6 +86,25 @@ class DeviceInfo { InitInternal(&info); } + void SetRunMode(PowerMode mode, int threads); + void SetCache(int l1size, int l2size, int l3size); + void SetArch(ARMArch arch) { arch_ = arch; } + void BindDev(); + + PowerMode mode() const { return mode_; } + int threads() const { return active_ids_.size(); } + ARMArch arch() const { return arch_; } + + template + T* workspace_data() { + return workspace_.mutable_data(); + } + + int l1_cache_size() const { return L1_cache_[active_ids_[0]]; } + int l2_cache_size() const { return L2_cache_[active_ids_[0]]; } + int l3_cache_size() const { return L3_cache_[active_ids_[0]]; } + bool ExtendWorkspace(DDimLite dims); + private: DeviceInfo() = default; static void InitInternal(DeviceInfo* dev); diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc index e5988a3bb6f269d41a1fe083415c3ff6a3456847..a8a2ac790a3c045642277ef75367bbdd878f0d6d 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc @@ -28,8 +28,6 @@ void ConvCompute::PrepareForRun() { auto o_dims = param.output->dims(); auto& ctx = this->ctx_->template As(); - // TODO(xxx): make api and expose it - ctx.SetRunMode(LITE_POWER_HIGH, 4); int win = x_dims[3]; // nchw int hin = x_dims[2]; diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.cc b/paddle/fluid/lite/kernels/arm/fc_compute.cc index c7a9269b5f9af40e89a8e58e1363c1b131f81ac4..2e6f46a0e07e422bb118834214fee3fc43ae1d61 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc @@ -28,7 +28,6 @@ void FcCompute::PrepareForRun() { auto w_dims = param.w->dims(); auto& ctx = this->ctx_->template As(); - ctx.SetRunMode(LITE_POWER_HIGH, 4); CHECK_GE(x_dims.size(), 2UL); CHECK_EQ(w_dims.size(), 2UL); diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.cc b/paddle/fluid/lite/kernels/arm/mul_compute.cc index a176086a4cae61e2dc4ab2dec035c25a6df4b512..57c28e63bbf3bfdacf861d60ba2ab25436b61b42 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc @@ -24,7 +24,6 @@ namespace arm { void MulCompute::PrepareForRun() { auto& ctx = this->ctx_->template As(); - ctx.SetRunMode(LITE_POWER_HIGH, 4); } void MulCompute::Run() { diff --git a/paddle/fluid/lite/kernels/arm/pool_compute.cc b/paddle/fluid/lite/kernels/arm/pool_compute.cc index ea3d47a268588f7d593f0c3ac58f3421d9456fa8..3ee82ae6303f849a11d8685aae09b267bb991604 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute.cc +++ b/paddle/fluid/lite/kernels/arm/pool_compute.cc @@ -26,7 +26,6 @@ namespace arm { void PoolCompute::PrepareForRun() { auto& ctx = this->ctx_->template As(); - ctx.SetRunMode(LITE_POWER_HIGH, 4); } void PoolCompute::Run() {