未验证 提交 bc79142c 编写于 作者: X Xiaoyang LI 提交者: GitHub

support setting cluster and threads in MobileConfig (#1848)

* fix building ios tiny publish lib error

* support setting cluster and threads in MobileConfig

* fix build error, test=develop

* fix building server publish error, test=develop
上级 26c78173
......@@ -95,7 +95,7 @@ endif()
# check options
if (LITE_ON_TINY_PUBLISH)
if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_JAVA AND NOT WITH_TESTING))
if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND NOT WITH_TESTING))#LITE_WITH_JAVA AND
message(FATAL_ERROR "LITE_ON_TINY_PUBLISH=ON must be used with WITH_LITE=ON LITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON LITE_WITH_JAVA=ON WITH_TESTING=OFF")
return()
endif()
......
......@@ -127,6 +127,7 @@ elseif(ARM_TARGET_OS STREQUAL "ios64")
else()
return()
endif()
add_definitions(-DTARGET_IOS)
# if do not specify the ARM_TARGET_ARCH_ABI then use default all supported
if(ARM_TARGET_ARCH_ABI STREQUAL "armv7"
......
......@@ -32,7 +32,11 @@ ELSE(WIN32)
SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.")
ENDIF()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
IF(ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"
OR ARM_TARGET_OS STREQUAL "ios" OR ARM_TARGET_OS STREQUAL "ios64")
ELSE()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
ENDIF()
ELSE(APPLE)
IF(EXISTS "/etc/issue")
......
......@@ -77,14 +77,16 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
)
add_dependencies(publish_inference_cxx_lib model_optimize_tool)
add_dependencies(publish_inference_cxx_lib paddle_code_generator)
add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_cxx_lib bundle_light_api)
add_dependencies(publish_inference_cxx_lib test_model_bin)
add_dependencies(publish_inference publish_inference_cxx_lib)
add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
if(NOT IOS)
add_dependencies(publish_inference_cxx_lib model_optimize_tool)
add_dependencies(publish_inference_cxx_lib paddle_code_generator)
add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_cxx_lib bundle_light_api)
add_dependencies(publish_inference_cxx_lib test_model_bin)
add_dependencies(publish_inference publish_inference_cxx_lib)
add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
endif()
endif()
......
......@@ -175,7 +175,11 @@ lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor)
#-----------------------------------------------------------------------------------------------------
# The final inference library for both CxxConfig and MobileConfig.
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api)
if (LITE_ON_TINY_PUBLISH)
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api stream)
else()
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api)
endif()
if (NOT LITE_ON_TINY_PUBLISH)
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api light_api
${ops}
......
......@@ -69,10 +69,10 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
if (thread_num == 1) {
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num);
lite::DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num);
LOG(INFO) << "LITE_POWER_HIGH";
} else {
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_NO_BIND, thread_num);
lite::DeviceInfo::Global().SetRunMode(LITE_POWER_NO_BIND, thread_num);
LOG(INFO) << "LITE_POWER_NO_BIND";
}
#endif
......
......@@ -28,7 +28,7 @@ namespace lite {
void TestModel(const std::vector<Place> &valid_places,
const Place &preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM
TEST(InceptionV4, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
......
......@@ -40,6 +40,10 @@ class LightPredictorImpl : public PaddlePredictor {
void LightPredictorImpl::Init(const MobileConfig& config) {
// LightPredictor Only support NaiveBuffer backend in publish lib
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(config.power_mode(), config.threads());
#endif
raw_predictor_.reset(new lite::LightPredictor(config.model_dir(),
LiteModelType::kNaiveBuffer));
}
......
......@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -33,7 +33,7 @@ void TestModel(const std::vector<Place>& valid_places,
bool gen_npu = false,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, preferred_place, valid_places);
......
......@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -34,7 +34,7 @@ void TestModel(const std::vector<Place>& valid_places,
bool gen_npu = false,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, preferred_place, valid_places);
......
......@@ -28,7 +28,7 @@ namespace lite {
TEST(model, test) {
#ifdef LITE_WITH_ARM
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......
......@@ -64,7 +64,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const int warmup_times = 0) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num);
lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
#endif
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
......
......@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -110,7 +110,18 @@ class LITE_API CxxConfig : public ConfigBase {
/// MobileConfig is the config for the light weight predictor, it will skip
/// IR optimization or other unnecessary stages.
class LITE_API MobileConfig : public ConfigBase {};
class LITE_API MobileConfig : public ConfigBase {
PowerMode mode_{LITE_POWER_HIGH};
int threads_{1};
public:
MobileConfig(Place preferred_place=Place(TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)),
PowerMode mode=LITE_POWER_HIGH, int threads=1) : mode_(mode), threads_(threads) {}
void set_power_mode(PowerMode mode) { mode_ = mode; }
void set_threads(int threads) { threads_ = threads; }
PowerMode power_mode() const { return mode_; }
int threads() const { return threads_; }
};
template <typename ConfigT>
std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&);
......
......@@ -70,6 +70,14 @@ enum class DataLayoutType : int {
kAny = 2, // any data layout
NUM = 4, // number of fields.
};
typedef enum {
LITE_POWER_HIGH = 0,
LITE_POWER_LOW = 1,
LITE_POWER_FULL = 2,
LITE_POWER_NO_BIND = 3,
LITE_POWER_RAND_HIGH = 4,
LITE_POWER_RAND_LOW = 5
} PowerMode;
enum class ActivationType : int {
kIndentity = 0,
......
......@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM
TEST(ResNet18, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
......
......@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -28,7 +28,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
......@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM
TEST(unet, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
......
......@@ -65,7 +65,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
conv_direct_3x3s1.cc
conv_direct_3x3s2.cc
conv_direct.cc
conv_depthwise_3x3_int7.cc
conv_depthwise_3x3_int8.cc
conv_depthwise_5x5s1_int8.cc
conv_depthwise_3x3p0.cc
......
此差异已折叠。
......@@ -101,7 +101,7 @@ class Context<TargetType::kARM> {
void CopySharedTo(ARMContext* ctx) {}
void SetRunMode(PowerMode mode, int threads) {
void SetRunMode(lite_api::PowerMode mode, int threads) {
return DeviceInfo::Global().SetRunMode(mode, threads);
}
void SetCache(int l1size, int l2size, int l3size) {
......@@ -109,7 +109,7 @@ class Context<TargetType::kARM> {
}
void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); }
PowerMode mode() const { return DeviceInfo::Global().mode(); }
lite_api::PowerMode mode() const { return DeviceInfo::Global().mode(); }
int threads() const { return DeviceInfo::Global().threads(); }
ARMArch arch() const { return DeviceInfo::Global().arch(); }
int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }
......
......@@ -119,7 +119,8 @@ size_t get_mem_size() {
return memsize;
#elif defined(TARGET_IOS)
// to be implemented
printf("not implemented\n");
printf("not implemented, set to default 4GB\n");
return 4096 * 1024;
#endif
return 0;
}
......@@ -209,7 +210,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
}
#elif defined(TARGET_IOS)
for (int i = 0; i < cpu_num; ++i) {
archs->at(i) = APPLE;
archs->at(i) = kAPPLE;
}
#endif
}
......@@ -818,7 +819,7 @@ void DeviceInfo::RequestPowerFullMode(int thread_num) {
active_ids_.push_back(little_core_ids_[i - big_core_size]);
}
}
mode_ = LITE_POWER_FULL;
mode_ = lite_api::PowerMode::LITE_POWER_FULL;
}
void DeviceInfo::RequestPowerHighMode(int thread_num) {
......@@ -826,7 +827,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
int little_core_size = little_core_ids_.size();
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_HIGH;
mode_ =lite_api::PowerMode::LITE_POWER_HIGH;
if (thread_num > big_core_size) {
LOG(ERROR) << "Request thread num: " << thread_num
<< ", exceed the big cores size: " << big_core_size
......@@ -838,7 +839,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
}
}
} else {
mode_ = LITE_POWER_LOW;
mode_ = lite_api::PowerMode::LITE_POWER_LOW;
LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores.";
if (thread_num > little_core_size) {
active_ids_ = little_core_ids_;
......@@ -855,7 +856,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
int little_core_size = little_core_ids_.size();
active_ids_.clear();
if (little_core_size > 0) {
mode_ = LITE_POWER_LOW;
mode_ = lite_api::PowerMode::LITE_POWER_LOW;
if (thread_num > little_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the little cores size: " << little_core_size
......@@ -867,7 +868,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
}
}
} else {
mode_ = LITE_POWER_HIGH;
mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
if (thread_num > big_core_size) {
active_ids_ = big_core_ids_;
......@@ -893,7 +894,7 @@ void DeviceInfo::RequestPowerNoBindMode(int thread_num) {
}
}
}
mode_ = LITE_POWER_NO_BIND;
mode_ = lite_api::PowerMode::LITE_POWER_NO_BIND;
}
void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
......@@ -901,7 +902,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
int little_core_size = little_core_ids_.size();
active_ids_.clear();
if (big_core_size > 0) {
mode_ = LITE_POWER_RAND_HIGH;
mode_ = lite_api::PowerMode::LITE_POWER_RAND_HIGH;
if (thread_num > big_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the big cores size: " << big_core_size
......@@ -913,7 +914,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
}
}
} else {
mode_ = LITE_POWER_LOW;
mode_ = lite_api::PowerMode::LITE_POWER_LOW;
LOG(WARNING) << "HIGH POWER MODE is not support, switch to little cores.";
if (thread_num > little_core_size) {
active_ids_ = little_core_ids_;
......@@ -930,7 +931,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
int little_core_size = little_core_ids_.size();
active_ids_.clear();
if (little_core_size > 0) {
mode_ = LITE_POWER_RAND_LOW;
mode_ = lite_api::PowerMode::LITE_POWER_RAND_LOW;
if (thread_num > little_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the little cores size: " << little_core_size
......@@ -943,7 +944,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
}
}
} else {
mode_ = LITE_POWER_HIGH;
mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores.";
if (thread_num > big_core_size) {
active_ids_ = big_core_ids_;
......@@ -957,6 +958,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
int DeviceInfo::Setup() {
core_num_ = get_cpu_num();
printf("core number: %d\n", core_num_);
mem_size_ = get_mem_size();
get_cpu_arch(&archs_, core_num_);
// set defalut CPU info
......@@ -966,10 +968,10 @@ int DeviceInfo::Setup() {
SetFP32Info(1, 1);
SetFP16Info(1, 0);
SetDotInfo(1, 0);
#ifdef LITE_WITH_LINUX
// get max&min freq
max_freqs_.resize(core_num_);
min_freqs_.resize(core_num_);
#ifdef LITE_WITH_LINUX
// get max&min freq
for (int i = 0; i < core_num_; ++i) {
int max_freq, min_freq;
get_cpu_max_min_freq(i, &max_freq, &min_freq);
......@@ -981,6 +983,30 @@ int DeviceInfo::Setup() {
if (!SetCPUInfoByName()) {
SetCPUInfoByProb();
}
core_ids_.resize(core_num_);
cluster_ids_.resize(core_num_);
for (int i = 0; i < core_num_; ++i) {
max_freqs_[i] = 1000000;
min_freqs_[i] = 1000000;
cluster_ids_[i] = 0;
}
#else
#ifdef TARGET_IOS
dev_name_ = "Apple";
#else
dev_name_ = "Unknown";
#endif
core_ids_.resize(core_num_);
cluster_ids_.resize(core_num_);
big_core_ids_.resize(core_num_);
for (int i = 0; i < core_num_; ++i) {
max_freqs_[i] = 1000000;
min_freqs_[i] = 1000000;
cluster_ids_[i] = 0;
core_ids_[i] = i;
big_core_ids_[i] = i;
}
#endif
// output info
LOG(INFO) << "ARM multiprocessors name: " << dev_name_;
LOG(INFO) << "ARM multiprocessors number: " << core_num_;
......@@ -1004,13 +1030,12 @@ int DeviceInfo::Setup() {
LOG(INFO) << L3_cache_[i] / 1024 << " KB";
}
LOG(INFO) << "Total memory: " << mem_size_ << "KB";
#endif
// set default run mode
SetRunMode(LITE_POWER_NO_BIND, 1); // use single thread by default
SetRunMode(lite_api::PowerMode::LITE_POWER_NO_BIND, 1); // use single thread by default
return 0;
}
void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
void DeviceInfo::SetRunMode(lite_api::PowerMode mode, int thread_num) {
#ifdef ARM_WITH_OMP
thread_num = std::min(thread_num, core_num_);
#else
......@@ -1024,22 +1049,22 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
count_++;
int shift_num = (count_ / 10) % big_core_size;
switch (mode) {
case LITE_POWER_FULL:
case lite_api::LITE_POWER_FULL:
RequestPowerFullMode(thread_num);
break;
case LITE_POWER_HIGH:
case lite_api::LITE_POWER_HIGH:
RequestPowerHighMode(thread_num);
break;
case LITE_POWER_LOW:
case lite_api::LITE_POWER_LOW:
RequestPowerLowMode(thread_num);
break;
case LITE_POWER_NO_BIND:
case lite_api::LITE_POWER_NO_BIND:
RequestPowerNoBindMode(thread_num);
break;
case LITE_POWER_RAND_HIGH:
case lite_api::LITE_POWER_RAND_HIGH:
RequestPowerRandHighMode(shift_num, thread_num);
break;
case LITE_POWER_RAND_LOW:
case lite_api::LITE_POWER_RAND_LOW:
RequestPowerRandLowMode(shift_num, thread_num);
break;
default:
......@@ -1052,12 +1077,12 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
#ifdef ARM_WITH_OMP
omp_set_num_threads(active_ids_.size());
#endif
if (mode_ != LITE_POWER_NO_BIND) {
if (mode_ != lite_api::LITE_POWER_NO_BIND) {
if (check_cpu_online(active_ids_)) {
bind_threads(active_ids_);
} else {
LOG(WARNING) << "Some cores are offline, switch to NO BIND MODE";
mode_ = LITE_POWER_NO_BIND;
mode_ = lite_api::LITE_POWER_NO_BIND;
}
}
#else // LITE_WITH_LINUX
......@@ -1080,7 +1105,7 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
workspace_.Resize({2 * (l1size + l2size)});
}
bool DeviceInfo::ExtendWorkspace(size_t size) {
bool DeviceInfo::ExtendWorkspace(int size) {
workspace_.Resize({size + llc_size()});
workspace_.mutable_data<int8_t>();
return true;
......
......@@ -25,15 +25,6 @@ namespace lite {
#ifdef LITE_WITH_ARM
typedef enum {
LITE_POWER_HIGH = 0,
LITE_POWER_LOW = 1,
LITE_POWER_FULL = 2,
LITE_POWER_NO_BIND = 3,
LITE_POWER_RAND_HIGH = 4,
LITE_POWER_RAND_LOW = 5
} PowerMode;
typedef enum {
kAPPLE = 0,
kA53 = 53,
......@@ -60,11 +51,11 @@ class DeviceInfo {
int Setup();
void SetRunMode(PowerMode mode, int thread_num);
void SetRunMode(lite_api::PowerMode mode, int thread_num);
void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch) { arch_ = arch; }
PowerMode mode() const { return mode_; }
lite_api::PowerMode mode() const { return mode_; }
int threads() const { return active_ids_.size(); }
ARMArch arch() const { return arch_; }
int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
......@@ -82,7 +73,7 @@ class DeviceInfo {
T* workspace_data() {
return reinterpret_cast<T*>(workspace_.mutable_data<int8_t>());
}
bool ExtendWorkspace(size_t size);
bool ExtendWorkspace(int size);
private:
int core_num_;
......@@ -107,7 +98,7 @@ class DeviceInfo {
// LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores
PowerMode mode_;
lite_api::PowerMode mode_;
std::vector<int> active_ids_;
TensorLite workspace_;
int64_t count_{0};
......
......@@ -171,9 +171,9 @@ void test_fc(Place place) {
DDim bdim{{bflag ? n : 0}};
std::unique_ptr<arena::TestCase> tester(
new FcOPTest(place, "def", dim_in, wdim, bdim, 1));
#ifdef WITH_ARM_LITE
#ifdef LITE_WITH_ARM
auto& ctx = tester->context()->As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 1);
ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
#endif
arena::Arena arena(std::move(tester), place, 6e-5);
if (!arena.TestPrecision()) {
......
......@@ -344,7 +344,7 @@ void test_gru_unit(Place place) {
place, "def", 1 /* sigomoid */, 2 /* tanh */, false, dims));
#ifdef LITE_WITH_ARM
auto& ctx = tester->context()->template As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 1);
ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
#endif
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
......
#!/bin/bash
set -e
build_dir=build.ios.armv7.arm64
mkdir -p ${build_dir}
......@@ -15,11 +16,15 @@ cmake .. \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DLITE_WITH_OPENMP=ON \
-DWITH_TESTING=OFF \
-DLITE_WITH_JAVA=OFF \
-DLITE_SHUTDOWN_LOG=ON \
-DLITE_ON_TINY_PUBLISH=ON \
-DLITE_WITH_OPENMP=OFF \
-DWITH_ARM_DOTPROD=OFF \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=ios
make -j2
make -j4
cd -
......@@ -33,7 +33,7 @@ void Run(DebugConfig* conf) {
CHECK(conf);
#ifdef LITE_WITH_ARM
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, conf->arm_thread_num);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, conf->arm_thread_num);
#endif
lite::Predictor predictor;
std::vector<Place> valid_places({
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册