ARM cpu_info refine

test=develop

ARM cpu_info refine
test=develop
ce46ef22 · hong19860320 · 251255bc · ce46ef22 · ce46ef22 · ce46ef22
3 changed file
--- a/paddle/fluid/lite/core/context.h
+++ b/paddle/fluid/lite/core/context.h
@@ -67,7 +67,7 @@ class Context<TargetType::kARM> {
  ARMContext& operator=(const ARMContext& ctx) {}
  // NOTE: InitOnce should only be used by ContextScheduler
-  void InitOnce() {}
+  void InitOnce() { DeviceInfo::Init(); }
  void CopyShared(const ARMContext* ctx) {}
@@ -78,20 +78,19 @@ class Context<TargetType::kARM> {
    return DeviceInfo::Global().SetCache(l1size, l2size, l3size);
  }
  void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); }
-  void BindDev() { return DeviceInfo::Global().BindDev(); }
  PowerMode mode() const { return DeviceInfo::Global().mode(); }
  int threads() const { return DeviceInfo::Global().threads(); }
  ARMArch arch() const { return DeviceInfo::Global().arch(); }
+  int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }
+  int l2_cache_size() const { return DeviceInfo::Global().l2_cache_size(); }
+  int l3_cache_size() const { return DeviceInfo::Global().l3_cache_size(); }
  template <typename T>
  T* workspace_data() {
    return DeviceInfo::Global().workspace_data<T>();
  }
-  int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }
-  int l2_cache_size() const { return DeviceInfo::Global().l2_cache_size(); }
-  int l3_cache_size() const { return DeviceInfo::Global().l3_cache_size(); }
  bool ExtendWorkspace(DDimLite dims) {
    return DeviceInfo::Global().ExtendWorkspace(dims);
  }

--- a/paddle/fluid/lite/core/cpu_info.cc
+++ b/paddle/fluid/lite/core/cpu_info.cc
--- a/paddle/fluid/lite/core/cpu_info.h
+++ b/paddle/fluid/lite/core/cpu_info.h
@@ -14,6 +14,7 @@
 #pragma once
+#include <cstdarg>
 #include <string>
 #include <vector>
 #include "paddle/fluid/lite/core/lite_tensor.h"
@@ -47,92 +48,73 @@ typedef enum {
 class DeviceInfo {
 public:
-  int idx_;
-  int max_freq_;
-  int min_freq_;
-  int generate_arch_;
-  int compute_core_num_;
-  int max_memory_;
-  int sharemem_size_;
-  std::string device_name_;
-  std::string compute_ability_;
-  std::vector<int> L1_cache_;
-  std::vector<int> L2_cache_;
-  std::vector<int> L3_cache_;
-  std::vector<int> core_ids_;
-  std::vector<int> big_core_ids_;
-  std::vector<int> little_core_ids_;
-  std::vector<int> cluster_ids_;
-  std::vector<ARMArch> archs_;
-  ARMArch arch_;
-  // LITE_POWER_HIGH stands for using big cores,
-  // LITE_POWER_LOW stands for using small core,
-  // LITE_POWER_FULL stands for using all cores
-  PowerMode mode_;
-  std::vector<int> active_ids_;
-  TensorLite workspace_;
-  int64_t count_{0};
  static DeviceInfo& Global() {
    static auto* x = new DeviceInfo;
    return *x;
  }
-  static void Init() {
+  static int Init() {
-    auto& info = Global();
+    static int ret = Global().Setup();
-    InitInternal(&info);
+    return ret;
  }
-  void SetRunMode(PowerMode mode, int threads);
+  int Setup();
+  void SetRunMode(PowerMode mode, int thread_num);
  void SetCache(int l1size, int l2size, int l3size);
  void SetArch(ARMArch arch) { arch_ = arch; }
-  void BindDev();
  PowerMode mode() const { return mode_; }
  int threads() const { return active_ids_.size(); }
  ARMArch arch() const { return arch_; }
+  int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
+  int l2_cache_size() const { return L2_cache_[active_ids_[0]]; }
+  int l3_cache_size() const { return L3_cache_[active_ids_[0]]; }
  template <typename T>
  T* workspace_data() {
    return workspace_.mutable_data<T>();
  }
-  int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
-  int l2_cache_size() const { return L2_cache_[active_ids_[0]]; }
-  int l3_cache_size() const { return L3_cache_[active_ids_[0]]; }
  bool ExtendWorkspace(DDimLite dims);
 private:
-  DeviceInfo() = default;
+  int core_num_;
-  static void InitInternal(DeviceInfo* dev);
+  std::vector<int> max_freqs_;
-};
+  std::vector<int> min_freqs_;
+  int mem_size_;
-size_t arm_get_meminfo();
+  std::string dev_name_;
-int arm_get_cpucount();
-void arm_get_cpu_arch(std::vector<ARMArch>* archs);
-bool get_cpu_info_from_name(DeviceInfo* cpu_info, std::string hardware_name);
-#ifdef LITE_WITH_LINUX
+  std::vector<int> L1_cache_;
+  std::vector<int> L2_cache_;
-void set_default_cache(DeviceInfo* dev);
+  std::vector<int> L3_cache_;
+  std::vector<int> core_ids_;
-std::string arm_get_cpu_name();
+  std::vector<int> big_core_ids_;
+  std::vector<int> little_core_ids_;
+  std::vector<int> cluster_ids_;
+  std::vector<ARMArch> archs_;
-int get_max_freq_khz(int cpuid);
+  ARMArch arch_;
+  // LITE_POWER_HIGH stands for using big cores,
+  // LITE_POWER_LOW stands for using small core,
+  // LITE_POWER_FULL stands for using all cores
+  PowerMode mode_;
+  std::vector<int> active_ids_;
+  TensorLite workspace_;
+  int64_t count_{0};
-int arm_sort_cpuid_by_max_frequency(int cpu_count, std::vector<int>* cpuids,
+  void SetCacheInfo(int cache_id, int argc, ...);
-                                    const std::vector<int>& cpu_freq,
+  void SetArchInfo(int argc, ...);
-                                    std::vector<int>* cluster_ids);
+  bool SetCPUInfoByName();
-int check_online(const std::vector<int>& core_ids);
+  void SetCPUInfoByProb();
-int set_sched_affinity(const std::vector<int>& cpuids);
+  void RequestPowerFullMode(const int thread_num);
+  void RequestPowerHighMode(const int thread_num);
+  void RequestPowerLowMode(const int thread_num);
+  void RequestPowerNoBindMode(const int thread_num);
+  void RequestPowerRandHighMode(const int shift_num, const int thread_num);
+  void RequestPowerRandLowMode(const int shift_num, const int thread_num);
-#endif  // LITE_WITH_LINUX
+  DeviceInfo() = default;
+};
 #endif  // LITE_WITH_ARM