提交 845af6e0 编写于 作者: X Xiaoyang LI 提交者: cyj1986

fix arm device_info error, fix bind big core error, improve 855 performance, test=develop (#2133)

上级 7115ef05
......@@ -68,7 +68,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
lite_api::MobileConfig config;
config.set_threads(thread_num);
if (thread_num == 1) {
config.set_power_mode(LITE_POWER_HIGH);
config.set_power_mode(LITE_POWER_NO_BIND);
} else {
config.set_power_mode(LITE_POWER_NO_BIND);
}
......
......@@ -28,7 +28,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
......
......@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
......
......@@ -32,7 +32,7 @@ void TestModel(const std::vector<Place>& valid_places,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
......
......@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
......
......@@ -33,7 +33,7 @@ void TestModel(const std::vector<Place>& valid_places,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
......
......@@ -237,65 +237,74 @@ std::string get_cpu_name() {
return "";
}
void get_cpu_max_min_freq(int cpu_id, int* max_freq, int* min_freq) {
*max_freq = 0;
*min_freq = 0;
int get_min_freq_khz(int cpuid) {
// first try, for all possible cpu
char path[256];
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpuid);
FILE* fp = fopen(path, "rb");
if (!fp) {
return -1;
}
int min_freq_khz = -1;
fscanf(fp, "%d", &min_freq_khz);
fclose(fp);
return min_freq_khz;
}
int get_max_freq_khz(int cpuid) {
// first try, for all possible cpu
char path[256];
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpufreq/stats/cpu%d/time_in_state",
cpu_id);
cpuid);
FILE* fp = fopen(path, "rb");
if (!fp) {
// second try, for online cpu
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state",
cpu_id);
cpuid);
fp = fopen(path, "rb");
if (!fp) {
// third try, for online cpu
// get max_freq
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
fp = fopen(path, "rb");
if (!fp) {
return;
}
int max_freq_khz = 0;
if (fp) {
while (!feof(fp)) {
int freq_khz = 0;
int nscan = fscanf(fp, "%d %*d", &freq_khz);
if (nscan != 1) {
break;
}
fscanf(fp, "%d", max_freq);
fclose(fp);
// get min_freq
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq",
cpu_id);
fp = fopen(path, "rb");
if (!fp) {
return;
if (freq_khz > max_freq_khz) {
max_freq_khz = freq_khz;
}
fscanf(fp, "%d", min_freq);
fclose(fp);
return;
}
}
*min_freq = std::numeric_limits<int>::max();
while (!feof(fp)) {
int freq = 0;
int nscan = fscanf(fp, "%d %*d", &freq);
if (nscan != 1) {
break;
}
if (freq > *max_freq) {
*max_freq = freq;
}
if (freq < *min_freq) {
*min_freq = freq;
if (max_freq_khz == 0 || !fp) {
// third try, for online cpu
snprintf(path,
sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpuid);
fp = fopen(path, "rb");
if (!fp) {
return -1;
}
int max_freq_khz = -1;
fscanf(fp, "%d", &max_freq_khz);
fclose(fp);
return max_freq_khz;
}
fclose(fp);
return max_freq_khz;
}
void sort_cpuid_by_max_freq(const std::vector<int>& max_freqs,
......@@ -835,7 +844,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
active_ids_ = big_core_ids_;
} else {
for (int i = 0; i < thread_num; ++i) {
active_ids_.push_back(big_core_ids_[i]);
active_ids_.push_back(big_core_ids_[big_core_size - 1 - i]);
}
}
} else {
......@@ -972,8 +981,8 @@ int DeviceInfo::Setup() {
#ifdef LITE_WITH_LINUX
// get max&min freq
for (int i = 0; i < core_num_; ++i) {
int max_freq, min_freq;
get_cpu_max_min_freq(i, &max_freq, &min_freq);
int max_freq = get_max_freq_khz(i);
int min_freq = get_min_freq_khz(i);
max_freqs_[i] = max_freq / 1000;
min_freqs_[i] = min_freq / 1000;
}
......@@ -982,13 +991,6 @@ int DeviceInfo::Setup() {
if (!SetCPUInfoByName()) {
SetCPUInfoByProb();
}
core_ids_.resize(core_num_);
cluster_ids_.resize(core_num_);
for (int i = 0; i < core_num_; ++i) {
max_freqs_[i] = 1000000;
min_freqs_[i] = 1000000;
cluster_ids_[i] = 0;
}
#else
#ifdef TARGET_IOS
dev_name_ = "Apple";
......
......@@ -24,7 +24,7 @@
#include "lite/kernels/arm/conv_compute.h"
#endif // LITE_WITH_ARM
DEFINE_int32(cluster, 0, "cluster id");
DEFINE_int32(cluster, 3, "cluster id");
DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times");
......
......@@ -24,7 +24,7 @@
#include "lite/kernels/arm/conv_compute.h"
#endif // LITE_WITH_ARM
DEFINE_int32(cluster, 0, "cluster id");
DEFINE_int32(cluster, 3, "cluster id");
DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times");
......
......@@ -26,7 +26,7 @@
typedef paddle::lite::Tensor Tensor;
DEFINE_int32(cluster, 0, "cluster id");
DEFINE_int32(cluster, 3, "cluster id");
DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times");
......
......@@ -26,7 +26,7 @@
typedef paddle::lite::Tensor Tensor;
DEFINE_int32(cluster, 0, "cluster id");
DEFINE_int32(cluster, 3, "cluster id");
DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册