support setting cluster and threads in MobileConfig (#1848)

* fix building ios tiny publish lib error * support setting cluster and threads in MobileConfig * fix build error, test=develop * fix building server publish error, test=develop

support setting cluster and threads in MobileConfig (#1848)
* fix building ios tiny publish lib error * support setting cluster and threads in MobileConfig * fix build error, test=develop * fix building server publish error, test=develop
bc79142c · Xiaoyang LI · GitHub · 26c78173 · bc79142c · bc79142c
32 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,7 +95,7 @@ endif()

 # check options
 if (LITE_ON_TINY_PUBLISH)
-    if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_JAVA AND NOT WITH_TESTING))
+    if (NOT (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND NOT WITH_TESTING))#LITE_WITH_JAVA AND 
        message(FATAL_ERROR "LITE_ON_TINY_PUBLISH=ON must be used with WITH_LITE=ON LITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON LITE_WITH_JAVA=ON WITH_TESTING=OFF")
        return()
    endif()

--- a/cmake/cross_compiling/ios.cmake
+++ b/cmake/cross_compiling/ios.cmake
@@ -127,6 +127,7 @@ elseif(ARM_TARGET_OS STREQUAL "ios64")
 else()
  return()
 endif()
+add_definitions(-DTARGET_IOS)

 # if do not specify the ARM_TARGET_ARCH_ABI then use default all supported
 if(ARM_TARGET_ARCH_ABI STREQUAL "armv7"

--- a/cmake/system.cmake
+++ b/cmake/system.cmake
@@ -32,7 +32,11 @@ ELSE(WIN32)
            SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING
                "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.")
        ENDIF()
-        set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
+        IF(ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"
+                OR ARM_TARGET_OS STREQUAL "ios" OR ARM_TARGET_OS STREQUAL "ios64")
+        ELSE()
+            set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
+        ENDIF()
    ELSE(APPLE)

        IF(EXISTS "/etc/issue")

--- a/lite/CMakeLists.txt
+++ b/lite/CMakeLists.txt
@@ -77,14 +77,16 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
                COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin"
                COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
                )
-        add_dependencies(publish_inference_cxx_lib model_optimize_tool)
-        add_dependencies(publish_inference_cxx_lib paddle_code_generator)
-        add_dependencies(publish_inference_cxx_lib bundle_full_api)
-        add_dependencies(publish_inference_cxx_lib bundle_light_api)
-        add_dependencies(publish_inference_cxx_lib test_model_bin)
-        add_dependencies(publish_inference publish_inference_cxx_lib)
-        add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
-                                       COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
+            if(NOT IOS)
+                add_dependencies(publish_inference_cxx_lib model_optimize_tool)
+                add_dependencies(publish_inference_cxx_lib paddle_code_generator)
+                add_dependencies(publish_inference_cxx_lib bundle_full_api)
+                add_dependencies(publish_inference_cxx_lib bundle_light_api)
+                add_dependencies(publish_inference_cxx_lib test_model_bin)
+                add_dependencies(publish_inference publish_inference_cxx_lib)
+                add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
+                        COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
+            endif()
    endif()



--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -175,7 +175,11 @@ lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor)

 #-----------------------------------------------------------------------------------------------------
 # The final inference library for both CxxConfig and MobileConfig.
-lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api)
+if (LITE_ON_TINY_PUBLISH)
+    lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api stream)
+else()
+    lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api paddle_api)
+endif()
 if (NOT LITE_ON_TINY_PUBLISH)
    lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api light_api
      ${ops}

--- a/lite/api/benchmark.cc
+++ b/lite/api/benchmark.cc
@@ -69,10 +69,10 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
 #ifdef LITE_WITH_ARM
  lite::DeviceInfo::Init();
  if (thread_num == 1) {
-    lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num);
+    lite::DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, thread_num);
    LOG(INFO) << "LITE_POWER_HIGH";
  } else {
-    lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_NO_BIND, thread_num);
+    lite::DeviceInfo::Global().SetRunMode(LITE_POWER_NO_BIND, thread_num);
    LOG(INFO) << "LITE_POWER_NO_BIND";
  }
 #endif

--- a/lite/api/efficientnet_b0_test.cc
+++ b/lite/api/efficientnet_b0_test.cc
@@ -28,7 +28,7 @@ namespace lite {
 void TestModel(const std::vector<Place> &valid_places,
               const Place &preferred_place) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/inceptionv4_test.cc
+++ b/lite/api/inceptionv4_test.cc
@@ -28,7 +28,7 @@ namespace lite {
 #ifdef LITE_WITH_ARM
 TEST(InceptionV4, test) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;
  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
                                   Place{TARGET(kARM), PRECISION(kFloat)}});

--- a/lite/api/light_api_impl.cc
+++ b/lite/api/light_api_impl.cc
@@ -40,6 +40,10 @@ class LightPredictorImpl : public PaddlePredictor {

 void LightPredictorImpl::Init(const MobileConfig& config) {
  // LightPredictor Only support NaiveBuffer backend in publish lib
+#ifdef LITE_WITH_ARM
+  lite::DeviceInfo::Init();
+  lite::DeviceInfo::Global().SetRunMode(config.power_mode(), config.threads());
+#endif
  raw_predictor_.reset(new lite::LightPredictor(config.model_dir(),
                                                LiteModelType::kNaiveBuffer));
 }

--- a/lite/api/mobilenetv1_int8_test.cc
+++ b/lite/api/mobilenetv1_int8_test.cc
@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place,
               bool use_npu = false) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/mobilenetv1_ssd_test.cc
+++ b/lite/api/mobilenetv1_ssd_test.cc
@@ -29,7 +29,7 @@ namespace lite {
 void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/mobilenetv1_test.cc
+++ b/lite/api/mobilenetv1_test.cc
@@ -33,7 +33,7 @@ void TestModel(const std::vector<Place>& valid_places,
               bool gen_npu = false,
               bool save_model = false) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(model_dir, preferred_place, valid_places);

--- a/lite/api/mobilenetv1_yolov3_test.cc
+++ b/lite/api/mobilenetv1_yolov3_test.cc
@@ -29,7 +29,7 @@ namespace lite {
 void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/mobilenetv2_test.cc
+++ b/lite/api/mobilenetv2_test.cc
@@ -34,7 +34,7 @@ void TestModel(const std::vector<Place>& valid_places,
               bool gen_npu = false,
               bool save_model = false) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(model_dir, preferred_place, valid_places);

--- a/lite/api/model_run_test_image.cc
+++ b/lite/api/model_run_test_image.cc
@@ -28,7 +28,7 @@ namespace lite {
 TEST(model, test) {
 #ifdef LITE_WITH_ARM
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;
  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
                                   Place{TARGET(kARM), PRECISION(kFloat)},

--- a/lite/api/model_test.cc
+++ b/lite/api/model_test.cc
@@ -64,7 +64,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
         const int warmup_times = 0) {
 #ifdef LITE_WITH_ARM
  lite::DeviceInfo::Init();
-  lite::DeviceInfo::Global().SetRunMode(lite::LITE_POWER_HIGH, thread_num);
+  lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
 #endif
  lite_api::MobileConfig config;
  config.set_model_dir(model_dir);

--- a/lite/api/ocr_attention_test.cc
+++ b/lite/api/ocr_attention_test.cc
@@ -29,7 +29,7 @@ void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place,
               bool use_npu = false) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -110,7 +110,18 @@ class LITE_API CxxConfig : public ConfigBase {

 /// MobileConfig is the config for the light weight predictor, it will skip
 /// IR optimization or other unnecessary stages.
-class LITE_API MobileConfig : public ConfigBase {};
+class LITE_API MobileConfig : public ConfigBase {
+  PowerMode mode_{LITE_POWER_HIGH};
+  int threads_{1};
+public:
+  MobileConfig(Place preferred_place=Place(TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)),
+               PowerMode mode=LITE_POWER_HIGH, int threads=1) : mode_(mode), threads_(threads) {}
+  void set_power_mode(PowerMode mode) { mode_ = mode; }
+  void set_threads(int threads) { threads_ = threads; }
+
+  PowerMode power_mode() const { return mode_; }
+  int threads() const { return threads_; }
+};

 template <typename ConfigT>
 std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&);

--- a/lite/api/paddle_place.h
+++ b/lite/api/paddle_place.h
@@ -70,6 +70,14 @@ enum class DataLayoutType : int {
  kAny = 2,  // any data layout
  NUM = 4,   // number of fields.
 };
+typedef enum {
+    LITE_POWER_HIGH = 0,
+    LITE_POWER_LOW = 1,
+    LITE_POWER_FULL = 2,
+    LITE_POWER_NO_BIND = 3,
+    LITE_POWER_RAND_HIGH = 4,
+    LITE_POWER_RAND_LOW = 5
+} PowerMode;

 enum class ActivationType : int {
  kIndentity = 0,

--- a/lite/api/resnet18_test.cc
+++ b/lite/api/resnet18_test.cc
@@ -28,7 +28,7 @@ namespace lite {
 #ifdef LITE_WITH_ARM
 TEST(ResNet18, test) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;
  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
                                   Place{TARGET(kARM), PRECISION(kFloat)}});

--- a/lite/api/resnet50_test.cc
+++ b/lite/api/resnet50_test.cc
@@ -29,7 +29,7 @@ namespace lite {
 void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/shufflenetv2_test.cc
+++ b/lite/api/shufflenetv2_test.cc
@@ -28,7 +28,7 @@ namespace lite {
 void TestModel(const std::vector<Place>& valid_places,
               const Place& preferred_place) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;

  predictor.Build(FLAGS_model_dir, preferred_place, valid_places);

--- a/lite/api/unet_test.cc
+++ b/lite/api/unet_test.cc
@@ -28,7 +28,7 @@ namespace lite {
 #ifdef LITE_WITH_ARM
 TEST(unet, test) {
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, FLAGS_threads);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
  lite::Predictor predictor;
  std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
                                   Place{TARGET(kARM), PRECISION(kFloat)}});

--- a/lite/arm/math/CMakeLists.txt
+++ b/lite/arm/math/CMakeLists.txt
@@ -65,7 +65,6 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
      conv_direct_3x3s1.cc
      conv_direct_3x3s2.cc
      conv_direct.cc
-      conv_depthwise_3x3_int7.cc
      conv_depthwise_3x3_int8.cc
      conv_depthwise_5x5s1_int8.cc
      conv_depthwise_3x3p0.cc

--- a/lite/arm/math/conv_depthwise_3x3_int7.cc
+++ b/lite/arm/math/conv_depthwise_3x3_int7.cc
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -101,7 +101,7 @@ class Context<TargetType::kARM> {

  void CopySharedTo(ARMContext* ctx) {}

-  void SetRunMode(PowerMode mode, int threads) {
+  void SetRunMode(lite_api::PowerMode mode, int threads) {
    return DeviceInfo::Global().SetRunMode(mode, threads);
  }
  void SetCache(int l1size, int l2size, int l3size) {
@@ -109,7 +109,7 @@ class Context<TargetType::kARM> {
  }
  void SetArch(ARMArch arch) { return DeviceInfo::Global().SetArch(arch); }

-  PowerMode mode() const { return DeviceInfo::Global().mode(); }
+  lite_api::PowerMode mode() const { return DeviceInfo::Global().mode(); }
  int threads() const { return DeviceInfo::Global().threads(); }
  ARMArch arch() const { return DeviceInfo::Global().arch(); }
  int l1_cache_size() const { return DeviceInfo::Global().l1_cache_size(); }

--- a/lite/core/cpu_info.cc
+++ b/lite/core/cpu_info.cc
@@ -119,7 +119,8 @@ size_t get_mem_size() {
  return memsize;
 #elif defined(TARGET_IOS)
  // to be implemented
-  printf("not implemented\n");
+  printf("not implemented, set to default 4GB\n");
+  return 4096 * 1024;
 #endif
  return 0;
 }
@@ -209,7 +210,7 @@ void get_cpu_arch(std::vector<ARMArch>* archs, const int cpu_num) {
  }
 #elif defined(TARGET_IOS)
  for (int i = 0; i < cpu_num; ++i) {
-    archs->at(i) = APPLE;
+    archs->at(i) = kAPPLE;
  }
 #endif
 }
@@ -818,7 +819,7 @@ void DeviceInfo::RequestPowerFullMode(int thread_num) {
      active_ids_.push_back(little_core_ids_[i - big_core_size]);
    }
  }
-  mode_ = LITE_POWER_FULL;
+  mode_ = lite_api::PowerMode::LITE_POWER_FULL;
 }

 void DeviceInfo::RequestPowerHighMode(int thread_num) {
@@ -826,7 +827,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
  int little_core_size = little_core_ids_.size();
  active_ids_.clear();
  if (big_core_size > 0) {
-    mode_ = LITE_POWER_HIGH;
+    mode_ =lite_api::PowerMode::LITE_POWER_HIGH;
    if (thread_num > big_core_size) {
      LOG(ERROR) << "Request thread num: " << thread_num
                 << ", exceed the big cores size: " << big_core_size
@@ -838,7 +839,7 @@ void DeviceInfo::RequestPowerHighMode(int thread_num) {
      }
    }
  } else {
-    mode_ = LITE_POWER_LOW;
+    mode_ = lite_api::PowerMode::LITE_POWER_LOW;
    LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores.";
    if (thread_num > little_core_size) {
      active_ids_ = little_core_ids_;
@@ -855,7 +856,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
  int little_core_size = little_core_ids_.size();
  active_ids_.clear();
  if (little_core_size > 0) {
-    mode_ = LITE_POWER_LOW;
+    mode_ = lite_api::PowerMode::LITE_POWER_LOW;
    if (thread_num > little_core_size) {
      LOG(WARNING) << "Request thread num: " << thread_num
                   << ", exceed the little cores size: " << little_core_size
@@ -867,7 +868,7 @@ void DeviceInfo::RequestPowerLowMode(int thread_num) {
      }
    }
  } else {
-    mode_ = LITE_POWER_HIGH;
+    mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
    LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores";
    if (thread_num > big_core_size) {
      active_ids_ = big_core_ids_;
@@ -893,7 +894,7 @@ void DeviceInfo::RequestPowerNoBindMode(int thread_num) {
      }
    }
  }
-  mode_ = LITE_POWER_NO_BIND;
+  mode_ = lite_api::PowerMode::LITE_POWER_NO_BIND;
 }

 void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
@@ -901,7 +902,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
  int little_core_size = little_core_ids_.size();
  active_ids_.clear();
  if (big_core_size > 0) {
-    mode_ = LITE_POWER_RAND_HIGH;
+    mode_ = lite_api::PowerMode::LITE_POWER_RAND_HIGH;
    if (thread_num > big_core_size) {
      LOG(WARNING) << "Request thread num: " << thread_num
                   << ", exceed the big cores size: " << big_core_size
@@ -913,7 +914,7 @@ void DeviceInfo::RequestPowerRandHighMode(int shift_num, int thread_num) {
      }
    }
  } else {
-    mode_ = LITE_POWER_LOW;
+    mode_ = lite_api::PowerMode::LITE_POWER_LOW;
    LOG(WARNING) << "HIGH POWER MODE is not support, switch to little cores.";
    if (thread_num > little_core_size) {
      active_ids_ = little_core_ids_;
@@ -930,7 +931,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
  int little_core_size = little_core_ids_.size();
  active_ids_.clear();
  if (little_core_size > 0) {
-    mode_ = LITE_POWER_RAND_LOW;
+    mode_ = lite_api::PowerMode::LITE_POWER_RAND_LOW;
    if (thread_num > little_core_size) {
      LOG(WARNING) << "Request thread num: " << thread_num
                   << ", exceed the little cores size: " << little_core_size
@@ -943,7 +944,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {
      }
    }
  } else {
-    mode_ = LITE_POWER_HIGH;
+    mode_ = lite_api::PowerMode::LITE_POWER_HIGH;
    LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores.";
    if (thread_num > big_core_size) {
      active_ids_ = big_core_ids_;
@@ -957,6 +958,7 @@ void DeviceInfo::RequestPowerRandLowMode(int shift_num, int thread_num) {

 int DeviceInfo::Setup() {
  core_num_ = get_cpu_num();
+  printf("core number: %d\n", core_num_);
  mem_size_ = get_mem_size();
  get_cpu_arch(&archs_, core_num_);
  // set defalut CPU info
@@ -966,10 +968,10 @@ int DeviceInfo::Setup() {
  SetFP32Info(1, 1);
  SetFP16Info(1, 0);
  SetDotInfo(1, 0);
-#ifdef LITE_WITH_LINUX
-  // get max&min freq
  max_freqs_.resize(core_num_);
  min_freqs_.resize(core_num_);
+#ifdef LITE_WITH_LINUX
+  // get max&min freq
  for (int i = 0; i < core_num_; ++i) {
    int max_freq, min_freq;
    get_cpu_max_min_freq(i, &max_freq, &min_freq);
@@ -981,6 +983,30 @@ int DeviceInfo::Setup() {
  if (!SetCPUInfoByName()) {
    SetCPUInfoByProb();
  }
+  core_ids_.resize(core_num_);
+  cluster_ids_.resize(core_num_);
+  for (int i = 0; i < core_num_; ++i) {
+    max_freqs_[i] = 1000000;
+    min_freqs_[i] = 1000000;
+    cluster_ids_[i] = 0;
+  }
+#else
+#ifdef TARGET_IOS
+  dev_name_ = "Apple";
+#else
+  dev_name_ = "Unknown";
+#endif
+  core_ids_.resize(core_num_);
+  cluster_ids_.resize(core_num_);
+  big_core_ids_.resize(core_num_);
+  for (int i = 0; i < core_num_; ++i) {
+    max_freqs_[i] = 1000000;
+    min_freqs_[i] = 1000000;
+    cluster_ids_[i] = 0;
+    core_ids_[i] = i;
+    big_core_ids_[i] = i;
+  }
+#endif
  // output info
  LOG(INFO) << "ARM multiprocessors name: " << dev_name_;
  LOG(INFO) << "ARM multiprocessors number: " << core_num_;
@@ -1004,13 +1030,12 @@ int DeviceInfo::Setup() {
    LOG(INFO) << L3_cache_[i] / 1024 << " KB";
  }
  LOG(INFO) << "Total memory: " << mem_size_ << "KB";
-#endif
  // set default run mode
-  SetRunMode(LITE_POWER_NO_BIND, 1);  // use single thread by default
+  SetRunMode(lite_api::PowerMode::LITE_POWER_NO_BIND, 1);  // use single thread by default
  return 0;
 }

-void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
+void DeviceInfo::SetRunMode(lite_api::PowerMode mode, int thread_num) {
 #ifdef ARM_WITH_OMP
  thread_num = std::min(thread_num, core_num_);
 #else
@@ -1024,22 +1049,22 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
  count_++;
  int shift_num = (count_ / 10) % big_core_size;
  switch (mode) {
-    case LITE_POWER_FULL:
+    case lite_api::LITE_POWER_FULL:
      RequestPowerFullMode(thread_num);
      break;
-    case LITE_POWER_HIGH:
+    case lite_api::LITE_POWER_HIGH:
      RequestPowerHighMode(thread_num);
      break;
-    case LITE_POWER_LOW:
+    case lite_api::LITE_POWER_LOW:
      RequestPowerLowMode(thread_num);
      break;
-    case LITE_POWER_NO_BIND:
+    case lite_api::LITE_POWER_NO_BIND:
      RequestPowerNoBindMode(thread_num);
      break;
-    case LITE_POWER_RAND_HIGH:
+    case lite_api::LITE_POWER_RAND_HIGH:
      RequestPowerRandHighMode(shift_num, thread_num);
      break;
-    case LITE_POWER_RAND_LOW:
+    case lite_api::LITE_POWER_RAND_LOW:
      RequestPowerRandLowMode(shift_num, thread_num);
      break;
    default:
@@ -1052,12 +1077,12 @@ void DeviceInfo::SetRunMode(PowerMode mode, int thread_num) {
 #ifdef ARM_WITH_OMP
  omp_set_num_threads(active_ids_.size());
 #endif
-  if (mode_ != LITE_POWER_NO_BIND) {
+  if (mode_ != lite_api::LITE_POWER_NO_BIND) {
    if (check_cpu_online(active_ids_)) {
      bind_threads(active_ids_);
    } else {
      LOG(WARNING) << "Some cores are offline, switch to NO BIND MODE";
-      mode_ = LITE_POWER_NO_BIND;
+      mode_ = lite_api::LITE_POWER_NO_BIND;
    }
  }
 #else  // LITE_WITH_LINUX
@@ -1080,7 +1105,7 @@ void DeviceInfo::SetCache(int l1size, int l2size, int l3size) {
  workspace_.Resize({2 * (l1size + l2size)});
 }

-bool DeviceInfo::ExtendWorkspace(size_t size) {
+bool DeviceInfo::ExtendWorkspace(int size) {
  workspace_.Resize({size + llc_size()});
  workspace_.mutable_data<int8_t>();
  return true;

--- a/lite/core/cpu_info.h
+++ b/lite/core/cpu_info.h
@@ -25,15 +25,6 @@ namespace lite {

 #ifdef LITE_WITH_ARM

-typedef enum {
-  LITE_POWER_HIGH = 0,
-  LITE_POWER_LOW = 1,
-  LITE_POWER_FULL = 2,
-  LITE_POWER_NO_BIND = 3,
-  LITE_POWER_RAND_HIGH = 4,
-  LITE_POWER_RAND_LOW = 5
-} PowerMode;
-
 typedef enum {
  kAPPLE = 0,
  kA53 = 53,
@@ -60,11 +51,11 @@ class DeviceInfo {

  int Setup();

-  void SetRunMode(PowerMode mode, int thread_num);
+  void SetRunMode(lite_api::PowerMode mode, int thread_num);
  void SetCache(int l1size, int l2size, int l3size);
  void SetArch(ARMArch arch) { arch_ = arch; }

-  PowerMode mode() const { return mode_; }
+  lite_api::PowerMode mode() const { return mode_; }
  int threads() const { return active_ids_.size(); }
  ARMArch arch() const { return arch_; }
  int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
@@ -82,7 +73,7 @@ class DeviceInfo {
  T* workspace_data() {
    return reinterpret_cast<T*>(workspace_.mutable_data<int8_t>());
  }
-  bool ExtendWorkspace(size_t size);
+  bool ExtendWorkspace(int size);

 private:
  int core_num_;
@@ -107,7 +98,7 @@ class DeviceInfo {
  // LITE_POWER_HIGH stands for using big cores,
  // LITE_POWER_LOW stands for using small core,
  // LITE_POWER_FULL stands for using all cores
-  PowerMode mode_;
+  lite_api::PowerMode mode_;
  std::vector<int> active_ids_;
  TensorLite workspace_;
  int64_t count_{0};

--- a/lite/tests/kernels/fc_compute_test.cc
+++ b/lite/tests/kernels/fc_compute_test.cc
@@ -171,9 +171,9 @@ void test_fc(Place place) {
          DDim bdim{{bflag ? n : 0}};
          std::unique_ptr<arena::TestCase> tester(
              new FcOPTest(place, "def", dim_in, wdim, bdim, 1));
-#ifdef WITH_ARM_LITE
+#ifdef LITE_WITH_ARM
          auto& ctx = tester->context()->As<ARMContext>();
-          ctx.SetRunMode(LITE_POWER_HIGH, 1);
+          ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
 #endif
          arena::Arena arena(std::move(tester), place, 6e-5);
          if (!arena.TestPrecision()) {

--- a/lite/tests/kernels/gru_unit_test.cc
+++ b/lite/tests/kernels/gru_unit_test.cc
@@ -344,7 +344,7 @@ void test_gru_unit(Place place) {
      place, "def", 1 /* sigomoid */, 2 /* tanh */, false, dims));
 #ifdef LITE_WITH_ARM
  auto& ctx = tester->context()->template As<ARMContext>();
-  ctx.SetRunMode(LITE_POWER_HIGH, 1);
+  ctx.SetRunMode(lite_api::LITE_POWER_HIGH, 1);
 #endif
  arena::Arena arena(std::move(tester), place, 2e-5);
  arena.TestPrecision();

--- a/lite/tools/build_ios_armv7_arm64.sh
+++ b/lite/tools/build_ios_armv7_arm64.sh
 #!/bin/bash
+set -e

 build_dir=build.ios.armv7.arm64
 mkdir -p ${build_dir}
@@ -15,11 +16,15 @@ cmake .. \
        -DLITE_WITH_CUDA=OFF \
        -DLITE_WITH_X86=OFF \
        -DLITE_WITH_ARM=ON \
-        -DLITE_WITH_OPENMP=ON \
+        -DWITH_TESTING=OFF \
+        -DLITE_WITH_JAVA=OFF \
+        -DLITE_SHUTDOWN_LOG=ON \
+        -DLITE_ON_TINY_PUBLISH=ON \
+        -DLITE_WITH_OPENMP=OFF \
+        -DWITH_ARM_DOTPROD=OFF \
        -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-        -DWITH_TESTING=ON \
        -DARM_TARGET_OS=ios

-make -j2
+make -j4

 cd -
--- a/lite/tools/debug/model_debug_tool.cc
+++ b/lite/tools/debug/model_debug_tool.cc
@@ -33,7 +33,7 @@ void Run(DebugConfig* conf) {
  CHECK(conf);
 #ifdef LITE_WITH_ARM
  DeviceInfo::Init();
-  DeviceInfo::Global().SetRunMode(LITE_POWER_HIGH, conf->arm_thread_num);
+  DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, conf->arm_thread_num);
 #endif
  lite::Predictor predictor;
  std::vector<Place> valid_places({