未验证 提交 bc79142c 编写于 作者: X Xiaoyang LI 提交者: GitHub

support setting cluster and threads in MobileConfig (#1848)

* fix building ios tiny publish lib error

* support setting cluster and threads in MobileConfig

* fix build error, test=develop

* fix building server publish error, test=develop
上级 26c78173
...@@ -95,7 +95,7 @@ endif() ...@@ -95,7 +95,7 @@ endif()
# check options # check options
if (LITE_ON_TINY_PUBLISH) if (LITE_ON_TINY_PUBLISH)
if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_JAVA AND NOT WITH_TESTING)) if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND NOT WITH_TESTING))#LITE_WITH_JAVA AND
message(FATAL_ERROR "LITE_ON_TINY_PUBLISH=ON must be used with WITH_LITE=ON LITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON LITE_WITH_JAVA=ON WITH_TESTING=OFF") message(FATAL_ERROR "LITE_ON_TINY_PUBLISH=ON must be used with WITH_LITE=ON LITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON LITE_WITH_JAVA=ON WITH_TESTING=OFF")
return() return()
endif() endif()
......
...@@ -127,6 +127,7 @@ elseif(ARM_TARGET_OS STREQUAL "ios64") ...@@ -127,6 +127,7 @@ elseif(ARM_TARGET_OS STREQUAL "ios64")
else() else()
return() return()
endif() endif()
add_definitions(-DTARGET_IOS)
# if do not specify the ARM_TARGET_ARCH_ABI then use default all supported # if do not specify the ARM_TARGET_ARCH_ABI then use default all supported
if(ARM_TARGET_ARCH_ABI STREQUAL "armv7" if(ARM_TARGET_ARCH_ABI STREQUAL "armv7"
......
...@@ -32,7 +32,11 @@ ELSE(WIN32) ...@@ -32,7 +32,11 @@ ELSE(WIN32)
SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.")
ENDIF() ENDIF()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") IF(ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"
OR ARM_TARGET_OS STREQUAL "ios" OR ARM_TARGET_OS STREQUAL "ios64")
ELSE()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
ENDIF()
ELSE(APPLE) ELSE(APPLE)
IF(EXISTS "/etc/issue") IF(EXISTS "/etc/issue")
......
...@@ -77,14 +77,16 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -77,14 +77,16 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin" COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin" COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
) )
add_dependencies(publish_inference_cxx_lib model_optimize_tool) if(NOT IOS)
add_dependencies(publish_inference_cxx_lib paddle_code_generator) add_dependencies(publish_inference_cxx_lib model_optimize_tool)
add_dependencies(publish_inference_cxx_lib bundle_full_api) add_dependencies(publish_inference_cxx_lib paddle_code_generator)
add_dependencies(publish_inference_cxx_lib bundle_light_api) add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_cxx_lib test_model_bin) add_dependencies(publish_inference_cxx_lib bundle_light_api)
add_dependencies(publish_inference publish_inference_cxx_lib) add_dependencies(publish_inference_cxx_lib test_model_bin)
add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD add_dependencies(publish_inference publish_inference_cxx_lib)
COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a) add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
endif()
endif() endif()
......
...@@ -175,7 +175,11 @@ lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor) ...@@ -175,7 +175,11 @@ lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor)
#----------------------------------------------------------------------------------------------------- #-----------------------------------------------------------------------------------------------------
# The final inference library for both CxxConfig and MobileConfig. # The final inference library for both CxxConfig and MobileConfig.
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api) if (LITE_ON_TINY_PUBLISH)
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api stream)
else()
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api)
endif()
if (NOT LITE_ON_TINY_PUBLISH) if (NOT LITE_ON_TINY_PUBLISH)
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api light_api lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api light_api
${ops} ${ops}
......
...@@ -69,10 +69,10 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -69,10 +69,10 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
lite::DeviceInfo::Init(); lite::DeviceInfo::Init();
if (thread_num == 1) { if (thread_num == 1) {
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num); lite::DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num);
LOG(INFO) << "LITE_POWER_HIGH"; LOG(INFO) << "LITE_POWER_HIGH";
} else { } else {
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_NO_BIND, thread_num); lite::DeviceInfo::Global().SetRunMode(LITE_POWER_NO_BIND, thread_num);
LOG(INFO) << "LITE_POWER_NO_BIND"; LOG(INFO) << "LITE_POWER_NO_BIND";
} }
#endif #endif
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
void TestModel(const std::vector<Place> &valid_places, void TestModel(const std::vector<Place> &valid_places,
const Place &preferred_place) { const Place &preferred_place) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
TEST(InceptionV4, test) { TEST(InceptionV4, test) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}}); Place{TARGET(kARM), PRECISION(kFloat)}});
......
...@@ -40,6 +40,10 @@ class LightPredictorImpl : public PaddlePredictor { ...@@ -40,6 +40,10 @@ class LightPredictorImpl : public PaddlePredictor {
void LightPredictorImpl::Init(const MobileConfig& config) { void LightPredictorImpl::Init(const MobileConfig& config) {
// LightPredictor Only support NaiveBuffer backend in publish lib // LightPredictor Only support NaiveBuffer backend in publish lib
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(config.power_mode(), config.threads());
#endif
raw_predictor_.reset(new lite::LightPredictor(config.model_dir(), raw_predictor_.reset(new lite::LightPredictor(config.model_dir(),
LiteModelType::kNaiveBuffer)); LiteModelType::kNaiveBuffer));
} }
......
...@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places, ...@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place, const Place& preferred_place,
bool use_npu = false) { bool use_npu = false) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -29,7 +29,7 @@ namespace lite { ...@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places, void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) { const Place& preferred_place) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -33,7 +33,7 @@ void TestModel(const std::vector<Place>& valid_places, ...@@ -33,7 +33,7 @@ void TestModel(const std::vector<Place>& valid_places,
bool gen_npu = false, bool gen_npu = false,
bool save_model = false) { bool save_model = false) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(model_dir, preferred_place, valid_places); predictor.Build(model_dir, preferred_place, valid_places);
......
...@@ -29,7 +29,7 @@ namespace lite { ...@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places, void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) { const Place& preferred_place) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -34,7 +34,7 @@ void TestModel(const std::vector<Place>& valid_places, ...@@ -34,7 +34,7 @@ void TestModel(const std::vector<Place>& valid_places,
bool gen_npu = false, bool gen_npu = false,
bool save_model = false) { bool save_model = false) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(model_dir, preferred_place, valid_places); predictor.Build(model_dir, preferred_place, valid_places);
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
TEST(model, test) { TEST(model, test) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)},
......
...@@ -64,7 +64,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -64,7 +64,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const int warmup_times = 0) { const int warmup_times = 0) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
lite::DeviceInfo::Init(); lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num); lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
#endif #endif
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_dir(model_dir); config.set_model_dir(model_dir);
......
...@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places, ...@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place, const Place& preferred_place,
bool use_npu = false) { bool use_npu = false) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -110,7 +110,18 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -110,7 +110,18 @@ class LITE_API CxxConfig : public ConfigBase {
/// MobileConfig is the config for the light weight predictor, it will skip /// MobileConfig is the config for the light weight predictor, it will skip
/// IR optimization or other unnecessary stages. /// IR optimization or other unnecessary stages.
class LITE_API MobileConfig : public ConfigBase {}; class LITE_API MobileConfig : public ConfigBase {
PowerMode mode_{LITE_POWER_HIGH};
int threads_{1};
public:
MobileConfig(Place preferred_place=Place(TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)),
PowerMode mode=LITE_POWER_HIGH, int threads=1) : mode_(mode), threads_(threads) {}
void set_power_mode(PowerMode mode) { mode_ = mode; }
void set_threads(int threads) { threads_ = threads; }
PowerMode power_mode() const { return mode_; }
int threads() const { return threads_; }
};
template <typename ConfigT> template <typename ConfigT>
std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&); std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&);
......
...@@ -70,6 +70,14 @@ enum class DataLayoutType : int { ...@@ -70,6 +70,14 @@ enum class DataLayoutType : int {
kAny = 2, // any data layout kAny = 2, // any data layout
NUM = 4, // number of fields. NUM = 4, // number of fields.
}; };
typedef enum {
LITE_POWER_HIGH = 0,
LITE_POWER_LOW = 1,
LITE_POWER_FULL = 2,
LITE_POWER_NO_BIND = 3,
LITE_POWER_RAND_HIGH = 4,
LITE_POWER_RAND_LOW = 5
} PowerMode;
enum class ActivationType : int { enum class ActivationType : int {
kIndentity = 0, kIndentity = 0,
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
TEST(ResNet18, test) { TEST(ResNet18, test) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}}); Place{TARGET(kARM), PRECISION(kFloat)}});
......
...@@ -29,7 +29,7 @@ namespace lite { ...@@ -29,7 +29,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places, void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) { const Place& preferred_place) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
void TestModel(const std::vector<Place>& valid_places, void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) { const Place& preferred_place) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, preferred_place, valid_places); predictor.Build(FLAGS_model_dir, preferred_place, valid_places);
......
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
TEST(unet, test) { TEST(unet, test) {
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor; lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}}); Place{TARGET(kARM), PRECISION(kFloat)}});
......
...@@ -65,7 +65,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -65,7 +65,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
conv_direct_3x3s1.cc conv_direct_3x3s1.cc
conv_direct_3x3s2.cc conv_direct_3x3s2.cc
conv_direct.cc conv_direct.cc
conv_depthwise_3x3_int7.cc
conv_depthwise_3x3_int8.cc conv_depthwise_3x3_int8.cc
conv_depthwise_5x5s1_int8.cc conv_depthwise_5x5s1_int8.cc
conv_depthwise_3x3p0.cc conv_depthwise_3x3p0.cc
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
...@@ -101,7 +101,7 @@ class Context<TargetType::kARM> { ...@@ -101,7 +101,7 @@ class Context<TargetType::kARM> {
void CopySharedTo(ARMContext* ctx) {} void CopySharedTo(ARMContext* ctx) {}
void SetRunMode(PowerMode mode, int threads) { void SetRunMode(lite_api::PowerMode mode, int threads) {
return DeviceInfo::Global().SetRunMode(mode, threads); return DeviceInfo::Global().SetRunMode(mode, threads);
} }
void SetCache(int l1size, int l2size, int l3size) { void SetCache(int l1size, int l2size, int l3size) {
...@@ -109,7 +109,7 @@ class Context<TargetType::kARM> { ...@@ -109,7 +109,7 @@ class Context<TargetType::kARM> {
} }
void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); } void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); }
PowerMode mode() const { return DeviceInfo::Global().mode(); } lite_api::PowerMode mode() const { return DeviceInfo::Global().mode(); }
int threads() const { return DeviceInfo::Global().threads(); } int threads() const { return DeviceInfo::Global().threads(); }
ARMArch arch() const { return DeviceInfo::Global().arch(); } ARMArch arch() const { return DeviceInfo::Global().arch(); }
int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); } int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }
......
...@@ -119,7 +119,8 @@ size_t get_mem_size() { ...@@ -119,7 +119,8 @@ size_t get_mem_size() {
return memsize; return memsize;
#elif defined(TARGET_IOS) #elif defined(TARGET_IOS)
// to be implemented // to be implemented
printf("not implemented\n"); printf("not implemented, set to default 4GB\n");
return 4096 * 1024;
#endif #endif
return 0; return 0;
} }
...@@ -209,7 +210,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) { ...@@ -209,7 +210,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
} }
#elif defined(TARGET_IOS) #elif defined(TARGET_IOS)
for (int i = 0; i < cpu_num; ++i) { for (int i = 0; i < cpu_num; ++i) {
archs->at(i) = APPLE; archs->at(i) = kAPPLE;
} }
#endif #endif
} }
...@@ -818,7 +819,7 @@ void DeviceInfo::RequestPowerFullMode(int thread_num) { ...@@ -818,7 +819,7 @@ void DeviceInfo::RequestPowerFullMode(int thread_num) {
active_ids_.push_back(little_core_ids_[i - big_core_size]); active_ids_.push_back(little_core_ids_[i - big_core_size]);
} }
} }
mode_ = LITE_POWER_FULL; mode_ = lite_api::PowerMode::LITE_POWER_FULL;
} }
void DeviceInfo::RequestPowerHighMode(int thread_num) { void DeviceInfo::RequestPowerHighMode(int thread_num) {
...@@ -826,7 +827,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) { ...@@ -826,7 +827,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
int little_core_size = little_core_ids_.size(); int little_core_size = little_core_ids_.size();
active_ids_.clear(); active_ids_.clear();
if (big_core_size > 0) { if (big_core_size > 0) {
mode_ = LITE_POWER_HIGH; mode_ =lite_api::PowerMode::LITE_POWER_HIGH;
if (thread_num > big_core_size) { if (thread_num > big_core_size) {
LOG(ERROR) << "Request thread num: " << thread_num LOG(ERROR) << "Request thread num: " << thread_num
<< ", exceed the big cores size: " << big_core_size << ", exceed the big cores size: " << big_core_size
...@@ -838,7 +839,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) { ...@@ -838,7 +839,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
} }
} }
} else { } else {
mode_ = LITE_POWER_LOW; mode_ = lite_api::PowerMode::LITE_POWER_LOW;
LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores."; LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores.";
if (thread_num > little_core_size) { if (thread_num > little_core_size) {
active_ids_ = little_core_ids_; active_ids_ = little_core_ids_;
...@@ -855,7 +856,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) { ...@@ -855,7 +856,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
int little_core_size = little_core_ids_.size(); int little_core_size = little_core_ids_.size();
active_ids_.clear(); active_ids_.clear();
if (little_core_size > 0) { if (little_core_size > 0) {
mode_ = LITE_POWER_LOW; mode_ = lite_api::PowerMode::LITE_POWER_LOW;
if (thread_num > little_core_size) { if (thread_num > little_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the little cores size: " << little_core_size << ", exceed the little cores size: " << little_core_size
...@@ -867,7 +868,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) { ...@@ -867,7 +868,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
} }
} }
} else { } else {
mode_ = LITE_POWER_HIGH; mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
if (thread_num > big_core_size) { if (thread_num > big_core_size) {
active_ids_ = big_core_ids_; active_ids_ = big_core_ids_;
...@@ -893,7 +894,7 @@ void DeviceInfo::RequestPowerNoBindMode(int thread_num) { ...@@ -893,7 +894,7 @@ void DeviceInfo::RequestPowerNoBindMode(int thread_num) {
} }
} }
} }
mode_ = LITE_POWER_NO_BIND; mode_ = lite_api::PowerMode::LITE_POWER_NO_BIND;
} }
void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) { void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
...@@ -901,7 +902,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) { ...@@ -901,7 +902,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
int little_core_size = little_core_ids_.size(); int little_core_size = little_core_ids_.size();
active_ids_.clear(); active_ids_.clear();
if (big_core_size > 0) { if (big_core_size > 0) {
mode_ = LITE_POWER_RAND_HIGH; mode_ = lite_api::PowerMode::LITE_POWER_RAND_HIGH;
if (thread_num > big_core_size) { if (thread_num > big_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the big cores size: " << big_core_size << ", exceed the big cores size: " << big_core_size
...@@ -913,7 +914,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) { ...@@ -913,7 +914,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
} }
} }
} else { } else {
mode_ = LITE_POWER_LOW; mode_ = lite_api::PowerMode::LITE_POWER_LOW;
LOG(WARNING) << "HIGH POWER MODE is not support, switch to little cores."; LOG(WARNING) << "HIGH POWER MODE is not support, switch to little cores.";
if (thread_num > little_core_size) { if (thread_num > little_core_size) {
active_ids_ = little_core_ids_; active_ids_ = little_core_ids_;
...@@ -930,7 +931,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) { ...@@ -930,7 +931,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
int little_core_size = little_core_ids_.size(); int little_core_size = little_core_ids_.size();
active_ids_.clear(); active_ids_.clear();
if (little_core_size > 0) { if (little_core_size > 0) {
mode_ = LITE_POWER_RAND_LOW; mode_ = lite_api::PowerMode::LITE_POWER_RAND_LOW;
if (thread_num > little_core_size) { if (thread_num > little_core_size) {
LOG(WARNING) << "Request thread num: " << thread_num LOG(WARNING) << "Request thread num: " << thread_num
<< ", exceed the little cores size: " << little_core_size << ", exceed the little cores size: " << little_core_size
...@@ -943,7 +944,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) { ...@@ -943,7 +944,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
} }
} }
} else { } else {
mode_ = LITE_POWER_HIGH; mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores."; LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores.";
if (thread_num > big_core_size) { if (thread_num > big_core_size) {
active_ids_ = big_core_ids_; active_ids_ = big_core_ids_;
...@@ -957,6 +958,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) { ...@@ -957,6 +958,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
int DeviceInfo::Setup() { int DeviceInfo::Setup() {
core_num_ = get_cpu_num(); core_num_ = get_cpu_num();
printf("core number: %d\n", core_num_);
mem_size_ = get_mem_size(); mem_size_ = get_mem_size();
get_cpu_arch(&archs_, core_num_); get_cpu_arch(&archs_, core_num_);
// set defalut CPU info // set defalut CPU info
...@@ -966,10 +968,10 @@ int DeviceInfo::Setup() { ...@@ -966,10 +968,10 @@ int DeviceInfo::Setup() {
SetFP32Info(1, 1); SetFP32Info(1, 1);
SetFP16Info(1, 0); SetFP16Info(1, 0);
SetDotInfo(1, 0); SetDotInfo(1, 0);
#ifdef LITE_WITH_LINUX
// get max&min freq
max_freqs_.resize(core_num_); max_freqs_.resize(core_num_);
min_freqs_.resize(core_num_); min_freqs_.resize(core_num_);
#ifdef LITE_WITH_LINUX
// get max&min freq
for (int i = 0; i < core_num_; ++i) { for (int i = 0; i < core_num_; ++i) {
int max_freq, min_freq; int max_freq, min_freq;
get_cpu_max_min_freq(i, &max_freq, &min_freq); get_cpu_max_min_freq(i, &max_freq, &min_freq);
...@@ -981,6 +983,30 @@ int DeviceInfo::Setup() { ...@@ -981,6 +983,30 @@ int DeviceInfo::Setup() {
if (!SetCPUInfoByName()) { if (!SetCPUInfoByName()) {
SetCPUInfoByProb(); SetCPUInfoByProb();
} }
core_ids_.resize(core_num_);
cluster_ids_.resize(core_num_);
for (int i = 0; i < core_num_; ++i) {
max_freqs_[i] = 1000000;
min_freqs_[i] = 1000000;
cluster_ids_[i] = 0;
}
#else
#ifdef TARGET_IOS
dev_name_ = "Apple";
#else
dev_name_ = "Unknown";
#endif
core_ids_.resize(core_num_);
cluster_ids_.resize(core_num_);
big_core_ids_.resize(core_num_);
for (int i = 0; i < core_num_; ++i) {
max_freqs_[i] = 1000000;
min_freqs_[i] = 1000000;
cluster_ids_[i] = 0;
core_ids_[i] = i;
big_core_ids_[i] = i;
}
#endif
// output info // output info
LOG(INFO) << "ARM multiprocessors name: " << dev_name_; LOG(INFO) << "ARM multiprocessors name: " << dev_name_;
LOG(INFO) << "ARM multiprocessors number: " << core_num_; LOG(INFO) << "ARM multiprocessors number: " << core_num_;
...@@ -1004,13 +1030,12 @@ int DeviceInfo::Setup() { ...@@ -1004,13 +1030,12 @@ int DeviceInfo::Setup() {
LOG(INFO) << L3_cache_[i] / 1024 << " KB"; LOG(INFO) << L3_cache_[i] / 1024 << " KB";
} }
LOG(INFO) << "Total memory: " << mem_size_ << "KB"; LOG(INFO) << "Total memory: " << mem_size_ << "KB";
#endif
// set default run mode // set default run mode
SetRunMode(LITE_POWER_NO_BIND, 1); // use single thread by default SetRunMode(lite_api::PowerMode::LITE_POWER_NO_BIND, 1); // use single thread by default
return 0; return 0;
} }
void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) { void DeviceInfo::SetRunMode(lite_api::PowerMode mode, int thread_num) {
#ifdef ARM_WITH_OMP #ifdef ARM_WITH_OMP
thread_num = std::min(thread_num, core_num_); thread_num = std::min(thread_num, core_num_);
#else #else
...@@ -1024,22 +1049,22 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) { ...@@ -1024,22 +1049,22 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
count_++; count_++;
int shift_num = (count_ / 10) % big_core_size; int shift_num = (count_ / 10) % big_core_size;
switch (mode) { switch (mode) {
case LITE_POWER_FULL: case lite_api::LITE_POWER_FULL:
RequestPowerFullMode(thread_num); RequestPowerFullMode(thread_num);
break; break;
case LITE_POWER_HIGH: case lite_api::LITE_POWER_HIGH:
RequestPowerHighMode(thread_num); RequestPowerHighMode(thread_num);
break; break;
case LITE_POWER_LOW: case lite_api::LITE_POWER_LOW:
RequestPowerLowMode(thread_num); RequestPowerLowMode(thread_num);
break; break;
case LITE_POWER_NO_BIND: case lite_api::LITE_POWER_NO_BIND:
RequestPowerNoBindMode(thread_num); RequestPowerNoBindMode(thread_num);
break; break;
case LITE_POWER_RAND_HIGH: case lite_api::LITE_POWER_RAND_HIGH:
RequestPowerRandHighMode(shift_num, thread_num); RequestPowerRandHighMode(shift_num, thread_num);
break; break;
case LITE_POWER_RAND_LOW: case lite_api::LITE_POWER_RAND_LOW:
RequestPowerRandLowMode(shift_num, thread_num); RequestPowerRandLowMode(shift_num, thread_num);
break; break;
default: default:
...@@ -1052,12 +1077,12 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) { ...@@ -1052,12 +1077,12 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
#ifdef ARM_WITH_OMP #ifdef ARM_WITH_OMP
omp_set_num_threads(active_ids_.size()); omp_set_num_threads(active_ids_.size());
#endif #endif
if (mode_ != LITE_POWER_NO_BIND) { if (mode_ != lite_api::LITE_POWER_NO_BIND) {
if (check_cpu_online(active_ids_)) { if (check_cpu_online(active_ids_)) {
bind_threads(active_ids_); bind_threads(active_ids_);
} else { } else {
LOG(WARNING) << "Some cores are offline, switch to NO BIND MODE"; LOG(WARNING) << "Some cores are offline, switch to NO BIND MODE";
mode_ = LITE_POWER_NO_BIND; mode_ = lite_api::LITE_POWER_NO_BIND;
} }
} }
#else // LITE_WITH_LINUX #else // LITE_WITH_LINUX
...@@ -1080,7 +1105,7 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) { ...@@ -1080,7 +1105,7 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
workspace_.Resize({2 * (l1size + l2size)}); workspace_.Resize({2 * (l1size + l2size)});
} }
bool DeviceInfo::ExtendWorkspace(size_t size) { bool DeviceInfo::ExtendWorkspace(int size) {
workspace_.Resize({size + llc_size()}); workspace_.Resize({size + llc_size()});
workspace_.mutable_data<int8_t>(); workspace_.mutable_data<int8_t>();
return true; return true;
......
...@@ -25,15 +25,6 @@ namespace lite { ...@@ -25,15 +25,6 @@ namespace lite {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
typedef enum {
LITE_POWER_HIGH = 0,
LITE_POWER_LOW = 1,
LITE_POWER_FULL = 2,
LITE_POWER_NO_BIND = 3,
LITE_POWER_RAND_HIGH = 4,
LITE_POWER_RAND_LOW = 5
} PowerMode;
typedef enum { typedef enum {
kAPPLE = 0, kAPPLE = 0,
kA53 = 53, kA53 = 53,
...@@ -60,11 +51,11 @@ class DeviceInfo { ...@@ -60,11 +51,11 @@ class DeviceInfo {
int Setup(); int Setup();
void SetRunMode(PowerMode mode, int thread_num); void SetRunMode(lite_api::PowerMode mode, int thread_num);
void SetCache(int l1size, int l2size, int l3size); void SetCache(int l1size, int l2size, int l3size);
void SetArch(ARMArch arch) { arch_ = arch; } void SetArch(ARMArch arch) { arch_ = arch; }
PowerMode mode() const { return mode_; } lite_api::PowerMode mode() const { return mode_; }
int threads() const { return active_ids_.size(); } int threads() const { return active_ids_.size(); }
ARMArch arch() const { return arch_; } ARMArch arch() const { return arch_; }
int l1_cache_size() const { return L1_cache_[active_ids_[0]]; } int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
...@@ -82,7 +73,7 @@ class DeviceInfo { ...@@ -82,7 +73,7 @@ class DeviceInfo {
T* workspace_data() { T* workspace_data() {
return reinterpret_cast<T*>(workspace_.mutable_data<int8_t>()); return reinterpret_cast<T*>(workspace_.mutable_data<int8_t>());
} }
bool ExtendWorkspace(size_t size); bool ExtendWorkspace(int size);
private: private:
int core_num_; int core_num_;
...@@ -107,7 +98,7 @@ class DeviceInfo { ...@@ -107,7 +98,7 @@ class DeviceInfo {
// LITE_POWER_HIGH stands for using big cores, // LITE_POWER_HIGH stands for using big cores,
// LITE_POWER_LOW stands for using small core, // LITE_POWER_LOW stands for using small core,
// LITE_POWER_FULL stands for using all cores // LITE_POWER_FULL stands for using all cores
PowerMode mode_; lite_api::PowerMode mode_;
std::vector<int> active_ids_; std::vector<int> active_ids_;
TensorLite workspace_; TensorLite workspace_;
int64_t count_{0}; int64_t count_{0};
......
...@@ -171,9 +171,9 @@ void test_fc(Place place) { ...@@ -171,9 +171,9 @@ void test_fc(Place place) {
DDim bdim{{bflag ? n : 0}}; DDim bdim{{bflag ? n : 0}};
std::unique_ptr<arena::TestCase> tester( std::unique_ptr<arena::TestCase> tester(
new FcOPTest(place, "def", dim_in, wdim, bdim, 1)); new FcOPTest(place, "def", dim_in, wdim, bdim, 1));
#ifdef WITH_ARM_LITE #ifdef LITE_WITH_ARM
auto& ctx = tester->context()->As<ARMContext>(); auto& ctx = tester->context()->As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 1); ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
#endif #endif
arena::Arena arena(std::move(tester), place, 6e-5); arena::Arena arena(std::move(tester), place, 6e-5);
if (!arena.TestPrecision()) { if (!arena.TestPrecision()) {
......
...@@ -344,7 +344,7 @@ void test_gru_unit(Place place) { ...@@ -344,7 +344,7 @@ void test_gru_unit(Place place) {
place, "def", 1 /* sigomoid */, 2 /* tanh */, false, dims)); place, "def", 1 /* sigomoid */, 2 /* tanh */, false, dims));
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
auto& ctx = tester->context()->template As<ARMContext>(); auto& ctx = tester->context()->template As<ARMContext>();
ctx.SetRunMode(LITE_POWER_HIGH, 1); ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
#endif #endif
arena::Arena arena(std::move(tester), place, 2e-5); arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision(); arena.TestPrecision();
......
#!/bin/bash #!/bin/bash
set -e
build_dir=build.ios.armv7.arm64 build_dir=build.ios.armv7.arm64
mkdir -p ${build_dir} mkdir -p ${build_dir}
...@@ -15,11 +16,15 @@ cmake .. \ ...@@ -15,11 +16,15 @@ cmake .. \
-DLITE_WITH_CUDA=OFF \ -DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \ -DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \ -DLITE_WITH_ARM=ON \
-DLITE_WITH_OPENMP=ON \ -DWITH_TESTING=OFF \
-DLITE_WITH_JAVA=OFF \
-DLITE_SHUTDOWN_LOG=ON \
-DLITE_ON_TINY_PUBLISH=ON \
-DLITE_WITH_OPENMP=OFF \
-DWITH_ARM_DOTPROD=OFF \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=ios -DARM_TARGET_OS=ios
make -j2 make -j4
cd - cd -
...@@ -33,7 +33,7 @@ void Run(DebugConfig* conf) { ...@@ -33,7 +33,7 @@ void Run(DebugConfig* conf) {
CHECK(conf); CHECK(conf);
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
DeviceInfo::Init(); DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, conf->arm_thread_num); DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, conf->arm_thread_num);
#endif #endif
lite::Predictor predictor; lite::Predictor predictor;
std::vector<Place> valid_places({ std::vector<Place> valid_places({
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册