add openmp-fix, openmp test

5e63ef78 · Haipeng Wang · 6aefce59 · d51fa6ad · 5e63ef78 · 5e63ef78
108 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.0)
 project(paddle-mobile)
-#add_definitions(-DPADDLE_MOBILE_DEBUG)
+add_definitions(-DPADDLE_MOBILE_DEBUG)
 add_definitions(-DENABLE_EXCEPTION)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+#add_definitions(-DARMV7)
+#add_definitions(-DARMV8)
+#add_definitions(-DIOS)
+add_definitions(-DX86)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
 set(CMAKE_BUILD_TYPE RelWithDebInfo)
 set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -19,8 +24,78 @@ include_directories(src/)
 option(USE_OPENMP "openmp support" ON)
 if(USE_OPENMP)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+    add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
+endif()
+if (googlenet)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DLRN_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+elseif (mobilenet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+elseif (yolo)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+elseif (squeezenet)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSOFTMAX_OP)
+elseif(resnet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+else ()
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DBOXCODER_OP)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSIONCONVADD_OP)
+    add_definitions(-DCONVADDRELU_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DLRN_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DMULTICLASSNMS_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DPRIORBOX_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSIGMOID_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DTRANSPOSE_OP)
 endif()
 add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
+if (googlenet)
+elseif (mobilenet)
+elseif (yolo)
+elseif (squeezenet)
+elseif(resnet)
+else ()
+endif()
 add_subdirectory(test)
--- a/build.sh
+++ b/build.sh
@@ -33,6 +33,8 @@ build_for_mac() {
 }
 build_for_android() {
    if [ -z "${ANDROID_NDK}" ]; then
        echo "ANDROID_NDK not found!"
        exit -1
@@ -55,11 +57,13 @@ build_for_android() {
        exit -1
    fi
    MODE="Release"
    ANDROID_PLATFORM_VERSION="android-15"
    TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
    ANDROID_ARM_MODE="arm"
+    if [ $# -eq 1 ]; then
+    NET=$1
    cmake . \
        -B"build/release/${PLATFORM}" \
        -DANDROID_ABI="${ABI}" \
@@ -69,10 +73,24 @@ build_for_android() {
        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
        -DANDROID_STL=c++_static \
        -DANDROID=true \
+        -D"${NET}=true" \
        -D"${ARM_PLATFORM}"=true
+    else
+    cmake . \
+        -B"build/release/${PLATFORM}" \
+        -DANDROID_ABI="${ABI}" \
+        -DCMAKE_BUILD_TYPE="${MODE}" \
+        -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
+        -DANDROID_PLATFORM="${ANDROID_PLATFORM_VERSION}" \
+        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+        -DANDROID_STL=c++_static \
+        -DANDROID=true \
+        -D"${ARM_PLATFORM}"=true
+    fi
    cd "./build/release/${PLATFORM}"
    make -j 8
 }
 build_for_ios() {
@@ -106,15 +124,44 @@ if [ $# -lt 1 ]; then
    echo "available targets: mac|linux|ios|android"
    echo "sample usage: ./build.sh mac"
 else
-	if [ $1 = "mac" ]; then
+    if [ $# -eq 2 ]; then
-		build_for_mac
-	elif [ $1 = "linux" ]; then
+        if [[$2 != "googlenet"]] -a [[$2 != "mobilenet"]] -a [[$2 != "yolo"]] -a [[$2 != "squeezenet"]] -a [[$2 != "resnet"]]; then
-		build_for_linux
+            if [ $1 = "mac" ]; then
-	elif [ $1 = "android" ]; then
+		        build_for_mac
-		build_for_android
+	        elif [ $1 = "linux" ]; then
-	elif [ $1 = "ios" ]; then
+		        build_for_linux
-		build_for_ios
+	        elif [ $1 = "android" ]; then
-	else
+		        build_for_android
-		build_error
+	        elif [ $1 = "ios" ]; then
+		        build_for_ios
+	        else
+		        build_error
+	        fi
+        else
+            if [ $1 = "mac" ]; then
+		        build_for_mac $2
+	        elif [ $1 = "linux" ]; then
+		        build_for_linux $2
+	        elif [ $1 = "android" ]; then
+		        build_for_android $2
+	        elif [ $1 = "ios" ]; then
+		        build_for_ios $2
+	        else
+		        build_error
+	        fi
+        fi
+    else
+        if [ $1 = "mac" ]; then
+		    build_for_mac
+	    elif [ $1 = "linux" ]; then
+		    build_for_linux
+	    elif [ $1 = "android" ]; then
+		    build_for_android
+	    elif [ $1 = "ios" ]; then
+		    build_for_ios
+	    else
+		    build_error
+	    fi
 	fi
 fi
--- a/src/common/enforce.h
+++ b/src/common/enforce.h
@@ -18,7 +18,6 @@ limitations under the License. */
 #include <stdio.h>
 #include <exception>
 #include <sstream>
-#include <stdexcept>
 #include <string>
 #endif

--- a/src/platform/hostdevice.h
+++ b/src/platform/hostdevice.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef PADDLE_MOBILE_USE_OPENMP
+/**
-#ifdef __CUDACC__
+ * android-ndk-r17 has a problem when linking with openmp.
-#define HOSTDEVICE __host__ __device__
+ * if paddle-mobile enables -fopenmp, but didn't use those omp_* functions, after
-#define DEVICE __device__
+ * linking another binary with libpaddle-mobile.so, the omp_get_thread_num will not work.
-#define HOST __host__
+ * see test/common/test_openmp.cc
-#else
+ * the detailed reason is still unclear, but this trick will work.
-#define HOSTDEVICE
+ * a better solution is hacking the linker, try some flags to make it link omp_* functions,
-#define DEVICE
+ * but I didn't find out how to make it work.
-#define HOST
+ */
+#include <omp.h>
+static int _ = omp_get_num_procs();
 #endif
--- a/src/framework/attribute.cpp
+++ b/src/framework/attribute.cpp
@@ -17,14 +17,8 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
-/*
- * Variant<int, float, std::string, std::vector<int>, std::vector<float>,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
-          int64_t>
- * */
 struct PrintVistor : Vistor<Print &> {
-  PrintVistor(Print &printer) : printer_(printer) {}
+  explicit PrintVistor(Print &printer) : printer_(printer) {}
  template <typename T>
  Print &operator()(const T &value) {
    printer_ << value;

--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -14,7 +14,9 @@ limitations under the License. */
 #pragma once
+#include <string>
 #include <unordered_map>
+#include <vector>
 #include "common/enforce.h"
 #include "common/log.h"
 #include "common/variant.h"
@@ -22,28 +24,15 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
+using std::string;
+using std::vector;
 class BlockDesc;
 class Attribute {
 public:
-  /*
-   *  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT = 1,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING = 2,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS = 3,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS = 4,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS = 5,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9
-    PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
-   *
-   * */
  static Attribute GetAttrValue(
      PaddleMobile__Framework__Proto__OpDesc__Attr *attr_desc) {
-    //    std::cout << "begin get attr value" << std::endl;
    Attribute attr;
    switch (attr_desc->type) {
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN: {
@@ -63,35 +52,35 @@ class Attribute {
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS: {
-        std::vector<bool> val(attr_desc->n_bools);
+        vector<bool> val(attr_desc->n_bools);
        for (int i = 0; i < attr_desc->n_bools; ++i) {
          val[i] = attr_desc->bools[i];
        }
-        attr.Set<std::vector<bool>>(val);
+        attr.Set<vector<bool>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS: {
-        std::vector<int> val(attr_desc->n_ints);
+        vector<int> val(attr_desc->n_ints);
        for (int i = 0; i < attr_desc->n_ints; ++i) {
          val[i] = attr_desc->ints[i];
        }
-        attr.Set<std::vector<int>>(val);
+        attr.Set<vector<int>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS: {
-        std::vector<float> val(attr_desc->n_floats);
+        vector<float> val(attr_desc->n_floats);
        for (int i = 0; i < attr_desc->n_floats; ++i) {
          val[i] = attr_desc->floats[i];
        }
-        attr.Set<std::vector<float>>(val);
+        attr.Set<vector<float>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS: {
-        std::vector<std::string> val(attr_desc->n_strings);
+        vector<string> val(attr_desc->n_strings);
        for (int i = 0; i < attr_desc->n_strings; ++i) {
          val[i] = attr_desc->strings[i];
        }
-        attr.Set<std::vector<std::string>>(val);
+        attr.Set<vector<string>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG: {
@@ -122,21 +111,18 @@ class Attribute {
      return vistor(attr.variant_.Get<int>());
    } else if (attr.variant_.TypeId() == typeid(float).hash_code()) {
      return vistor(attr.variant_.Get<float>());
-    } else if (attr.variant_.TypeId() == typeid(std::string).hash_code()) {
+    } else if (attr.variant_.TypeId() == typeid(string).hash_code()) {
-      return vistor(attr.variant_.Get<std::string>());
+      return vistor(attr.variant_.Get<string>());
-    } else if (attr.variant_.TypeId() == typeid(std::vector<int>).hash_code()) {
+    } else if (attr.variant_.TypeId() == typeid(vector<int>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<int>>());
+      return vistor(attr.variant_.Get<vector<int>>());
-    } else if (attr.variant_.TypeId() ==
+    } else if (attr.variant_.TypeId() == typeid(vector<float>).hash_code()) {
-               typeid(std::vector<float>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<float>>());
-      return vistor(attr.variant_.Get<std::vector<float>>());
+    } else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) {
-    } else if (attr.variant_.TypeId() ==
+      return vistor(attr.variant_.Get<vector<string>>());
-               typeid(std::vector<std::string>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<std::string>>());
    } else if (attr.variant_.TypeId() == typeid(bool).hash_code()) {
      return vistor(attr.variant_.Get<bool>());
-    } else if (attr.variant_.TypeId() ==
+    } else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) {
-               typeid(std::vector<bool>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<bool>>());
-      return vistor(attr.variant_.Get<std::vector<bool>>());
    } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
      return vistor(attr.variant_.Get<int64_t>());
    } else {
@@ -145,24 +131,21 @@ class Attribute {
  }
 private:
-  Variant<int, float, std::string, std::vector<int>, std::vector<float>,
+  Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
+          vector<bool>, BlockDesc *, int64_t>
-          int64_t>
      variant_;
 };
-using AttributeMap = std::unordered_map<std::string, Attribute>;
+using AttributeMap = std::unordered_map<string, Attribute>;
 class AttrReader {
 public:
  explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
  template <typename T>
-  inline T Get(const std::string &name) const {
+  inline T Get(const string &name) const {
-    //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
+    PADDLE_MOBILE_ENFORCE(attrs_.count(name) != 0,
-    //          be in
+                          "%s should  be in AttributeMap", name);
-    //          AttributeMap",
-    //                         name);
    return ((Attribute)attrs_.at(name)).Get<T>();
  }

--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -54,7 +54,6 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) {
      return "ANY_LAYOUT";
    default:
      break;
-      //      std::cout << "unknown DataLayou %d", data_layout;
  }
 }

--- a/src/framework/data_transform.cpp
+++ b/src/framework/data_transform.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "framework/data_transform.h"
-namespace paddle_mobile {
-namespace framework {
-static void PassTensorData(Tensor *from, Tensor *to) {
-  to->ShareDataWith(*from);
-  *from = Tensor();
-}
-void DataTransform(const OpKernelType &expected_kernel_type,
-                   const OpKernelType &kernel_type_for_var,
-                   const Tensor &input_tensor, Tensor *output_tensor) {
-  bool transformed = false;
-  Tensor in;
-  in.ShareDataWith(input_tensor);
-  Tensor out;
-  //  // do layout transform
-  //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-  //                          kernel_type_for_var.data_layout_)) {
-  //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do data type transform
-  //  if (expected_kernel_type.data_type_ !=
-  //  kernel_type_for_var.data_type_) {
-  //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do device transform
-  //  if (!platform::is_same_place(kernel_type_for_var.place_,
-  //                               expected_kernel_type.place_)) {
-  //    TransDataDevice(in, expected_kernel_type.place_, &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-  //  check!");
-  // get output data
-  output_tensor->ShareDataWith(in);
-}
-void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                            Variable *out_var) {
-  //  if (in_var.IsType<LoDTensor>()) {
-  //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-  //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-  //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-  //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-  //    tran_lod_tensor->ShareDataWith(tensor);
-  //  } else if (in_var.IsType<SelectedRows>()) {
-  //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-  //    auto* trans_selected_rows =
-  //    out_var.GetMutable<SelectedRows>();
-  //    trans_selected_rows->set_height(in_selected_rows.height());
-  //    trans_selected_rows->set_rows(in_selected_rows.rows());
-  //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-  //  } else {
-  //    PADDLE_THROW("unknown var type");
-  //  }
-}
-}  // namespace framework
-}  // namespace paddle_mobile
--- a/src/framework/data_type.h
+++ b/src/framework/data_type.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-namespace paddle_mobile {
-namespace framework {
-//    inline proto::VarType::Type ToDataType(std::type_index type) {
-//        using namespace paddle_mobile::framework::proto;
-//        if (typeid(float).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP32;
-//        } else if (typeid(double).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP64;
-//        } else if (typeid(int).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT32;
-//        } else if (typeid(int64_t).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT64;
-//        } else if (typeid(bool).hash_code() == type.hash_code()) {
-//            return proto::VarType::BOOL;
-//        } else {
-////            PADDLE_THROW("Not supported");
-//        }
-//    }
-}  // namespace framework
-}  // namespace paddle_mobile
--- a/src/framework/ddim.cpp
+++ b/src/framework/ddim.cpp
@@ -183,7 +183,7 @@ DDim DDim::operator*(DDim d) const {
 int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
-void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
+void set(DDim *ddim, int idx, int value) { (*ddim)[idx] = value; }
 /// @cond HIDDEN
 struct VectorizeVisitor : Vistor<void> {

--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
@@ -83,17 +83,6 @@ struct DDim {
  int64_t operator[](int idx) const;
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
-  //    return var.apply_visitor(visitor);
-  //  }
-  //
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor)
-  //  const {
-  //    return var.apply_visitor(visitor);
-  //  }
  DDimVar getVar() { return var; }
  bool operator==(DDim d) const;
@@ -126,7 +115,7 @@ DDim make_ddim(std::initializer_list<int64_t> dims);
 int64_t get(const DDim &dim, int idx);
-void set(DDim &dim, int idx, int val);
+void set(DDim *dim, int idx, int val);
 std::vector<int64_t> vectorize(const DDim &ddim);

--- a/src/framework/dim.h
+++ b/src/framework/dim.h
@@ -19,8 +19,6 @@ limitations under the License. */
 #include <stdexcept>
 #include <type_traits>
-#include "platform/hostdevice.h"
 namespace paddle_mobile {
 namespace framework {
@@ -30,42 +28,35 @@ struct Dim {
  static constexpr int dimensions = i;
  template <typename... Args>
-  HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
+  Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
    static_assert(sizeof...(_tail) == i - 1,
                  "Dim initialized with the wrong number of parameters");
  }
-  HOSTDEVICE
  Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
-  HOSTDEVICE
  Dim() : head(0), tail() {}
  /** Construct a Dim from a linear index and size.  Uses Fortran
   * order
   * indexing. */
-  HOSTDEVICE
  Dim(int64_t idx, const Dim<i> &size)
      : head(idx % size.head), tail(idx / size.head, size.tail) {}
  /** Construct a Dim with each dimension set to the given index */
-  HOSTDEVICE
  Dim(int64_t idx) : head(idx), tail(idx) {}
-  HOSTDEVICE
  bool operator==(const Dim<i> &o) const {
    return (head == o.head) && (tail == o.tail);
  }
-  HOSTDEVICE
  bool operator!=(const Dim<i> &o) const { return !(*this == o); }
-  HOSTDEVICE
  int64_t &operator[](int idx);
-  HOSTDEVICE
  int64_t operator[](int idx) const;
-  HOST std::string to_string() const;
+  std::string to_string() const;
  int64_t head;
  Dim<i - 1> tail;
@@ -76,13 +67,10 @@ template <>
 struct Dim<0> {
  static constexpr int dimensions = 0;
-  HOSTDEVICE
  Dim(int64_t _head) {}
-  HOSTDEVICE
  Dim() {}
-  HOSTDEVICE
  Dim(int idx, const Dim<0> &size) {
 #ifndef __CUDA_ARCH__
    if (idx > 0) {
@@ -93,15 +81,12 @@ struct Dim<0> {
 #endif
  }
-  HOSTDEVICE
  bool operator==(const Dim<0> &o) const { return true; }
-  HOSTDEVICE
  bool operator!=(const Dim<0> &o) const { return false; }
-  HOSTDEVICE
  int64_t &operator[](int idx);
-  HOSTDEVICE
  int64_t operator[](int idx) const;
 };
@@ -112,12 +97,12 @@ template <int i>
 struct DimGetter {
  // Return a copy if Dim is const
  template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
    return DimGetter<i - 1>::impl(d.tail);
  }
  // Return a reference if Dim is mutable
  template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
    return DimGetter<i - 1>::impl(d.tail);
  }
 };
@@ -127,18 +112,18 @@ template <>
 struct DimGetter<0> {
  // Return a copy if Dim is const
  template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
    return d.head;
  }
  // Return a reference if Dim is mutable
  template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
    return d.head;
  }
 };
 template <int D>
-HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
+int64_t &indexer(Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
  if (idx < 0) {
    throw std::invalid_argument("Tried to access a negative dimension");
@@ -153,7 +138,7 @@ HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
 }
 template <>
-HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
+int64_t &indexer<0>(Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
  throw std::invalid_argument("Invalid index");
 #else
@@ -170,7 +155,7 @@ HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
 }
 template <int D>
-HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
+int64_t indexer(const Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
  if (idx < 0) {
    throw std::invalid_argument("Tried to access a negative dimension");
@@ -185,7 +170,7 @@ HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
 }
 template <>
-HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
+int64_t indexer<0>(const Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
  throw std::invalid_argument("Invalid index");
 #else
@@ -204,83 +189,77 @@ HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
 }  // namespace
 // Static access to constant Dim
 template <int i, int l>
-HOSTDEVICE int64_t get(const Dim<l> &d) {
+int64_t get(const Dim<l> &d) {
  return DimGetter<i>::impl(d);
 }
 // Static access to mutable Dim
 template <int i, int l>
-HOSTDEVICE int64_t &get(Dim<l> &d) {
+int64_t &get(Dim<l> &d) {
  return DimGetter<i>::impl(d);
 }
 // Dynamic access to constant Dim
 template <int l>
-HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
+int64_t Dim<l>::operator[](int i) const {
  //  std::cout << "l: " << l << std::endl;
  return indexer(*this, i);
 }
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
+int64_t &Dim<l>::operator[](int i) {
  return indexer(*this, i);
 }
 // Dynamic access to constant Dim
-inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
+inline int64_t Dim<0>::operator[](int i) const { return indexer(*this, i); }
-  return indexer(*this, i);
-}
 // Dynamic access to mutable Dim
-inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
+inline int64_t &Dim<0>::operator[](int i) { return indexer(*this, i); }
-  return indexer(*this, i);
-}
 // Dynamic access to constant Dim
 // without std::enable_if will try to instantiate this on get<0>(d)
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
+typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d, int i) {
-                                                               int i) {
  return d[i];
 }
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
+typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d, int i) {
-                                                                 int i) {
  return d[i];
 }
 // Dot product of two dims
 template <int i>
-HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
+int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
  return a.head * b.head + linearize(a.tail, b.tail);
 }
 // Base case dot product of two Dims
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
+inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
  return 0;
 }
 // Product of a Dim
 template <int i>
-HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
+int64_t product(const Dim<i> &a, int prod = 1) {
  return prod * a.head * product(a.tail);
 }
 // Base case product of a Dim
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
+inline int64_t product(const Dim<0> &a, int prod) {
  return prod;
 }
 // Is 0 <= idx_i < size_i for all i?
 template <int i>
-HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
+bool contained(const Dim<i> &idx, const Dim<i> &size) {
  return ((0 <= idx.head) && (idx.head < size.head) &&
          contained(idx.tail, size.tail));
 }
@@ -288,7 +267,7 @@ HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
 // Base case of is 0 <= idx_i < size_i ?
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
+inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
  return true;
 }
@@ -296,7 +275,7 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
 * \brief Compute exclusive prefix-multiply of a Dim.
 */
 template <int i>
-HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
+Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
  return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
 }
@@ -304,7 +283,7 @@ HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
 // Base case of ex_prefix_mul
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
+inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
  return Dim<0>();
 }
 ///\endcond
@@ -313,18 +292,18 @@ HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
 * Add two dimensions together
 */
 template <int i>
-HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
  return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
 }
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
  return Dim<0>();
 }
 template <int i>
-HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
  return dim_plus(lhs, rhs);
 }
@@ -332,18 +311,18 @@ HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
 * Multiply two dimensions together
 */
 template <int i>
-HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
  return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
 }
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
  return Dim<0>();
 }
 template <int i>
-HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
  return dim_mult(lhs, rhs);
 }
@@ -358,7 +337,7 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
 */
 template <int i>
-HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
+Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
  int norm_stride = size.head == 1 ? 0 : stride.head;
  return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
 }
@@ -366,8 +345,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
 ///\cond HIDDEN
 template <>
-HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
+inline Dim<0> normalize_strides(const Dim<0> &size, const Dim<0> &stride) {
-                                           const Dim<0> &stride) {
  return Dim<0>();
 }
@@ -382,7 +360,7 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
 */
 template <typename... Args>
-HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
+Dim<sizeof...(Args)> make_dim(Args... idxes) {
  return Dim<sizeof...(Args)>(idxes...);
 }
@@ -409,7 +387,7 @@ inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
 }
 template <int i>
-HOST std::string Dim<i>::to_string() const {
+std::string Dim<i>::to_string() const {
  std::stringstream stream;
  stream << *this;
@@ -418,7 +396,7 @@ HOST std::string Dim<i>::to_string() const {
 }
 template <int D>
-HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
+Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
  Dim<D> result;
  for (int i = 0; i < D - 1; ++i) {

--- a/src/framework/lod_tensor.cpp
+++ b/src/framework/lod_tensor.cpp
@@ -42,23 +42,10 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
 }
 std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-  //  PADDLE_ENFORCE(t.type().hash_code() ==
+  PADDLE_MOBILE_ENFORCE(t.type().hash_code() == typeid(float).hash_code(),
-  //  typeid(float).hash_code());
+                        "t.type() is not float");
-  //  if (!platform::is_cpu_place(t.place())) {
-  //    LoDTensor tt;
-  //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
-  //    platform::DeviceContextPool &pool =
-  //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
-  //    *pool.Get(t.place()); dev_ctx.Wait();
-  //
-  //    os << tt;
-  //    return os;
-  //  }
  os << "dim: " << t.dims() << "\n";
  os << "lod: " << t.lod() << "\n";
  // only print first ten elements
  int64_t size = t.numel() < 10 ? t.numel() : 10;
  for (int64_t i = 0; i < size; ++i) {
@@ -76,9 +63,9 @@ std::string LoDToString(const LoD &lod) {
 LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
                 size_t elem_end) {
-  //  PADDLE_ENFORCE_LT(level, in.size());
+  PADDLE_MOBILE_ENFORCE(level < in.size(), "level should >= in.size()");
-  //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+  PADDLE_MOBILE_ENFORCE(elem_end < in[level].size(),
+                        "elem_end >= in[level].size()");
  LoD res;
  res.resize(in.size() - level);
  // copy the first level
@@ -211,8 +198,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
  LoD sub_lod;
  for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
-    //    PADDLE_ENFORCE_LE(start_idx, end_idx);
+    PADDLE_MOBILE_ENFORCE(start_idx <= end_idx, "start_idx > end_idx");
-    //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
+    PADDLE_MOBILE_ENFORCE(end_idx < lod[level_idx].size(),
+                          "end_idx >= lod[level_idx].size()");
    std::vector<size_t> level_lens;
    for (size_t i = start_idx; i < end_idx; ++i) {
      level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
@@ -226,10 +214,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
 }
 void AppendLoD(LoD *lod, const LoD &lod_length) {
-  //  PADDLE_ENFORCE(
+  PADDLE_MOBILE_ENFORCE(
-  //      lod->empty() || lod->size() == lod_length.size(),
+      lod->empty() || lod->size() == lod_length.size(),
-  //      "The lod_length should has the same size with the appended
+      "The lod_length should has the same size with the appended lod.");
-  //      lod.");
  if (lod->empty()) {
    for (size_t i = 0; i < lod_length.size(); ++i) {
      lod->emplace_back(1, 0);  // size = 1, value = 0;

--- a/src/framework/op_info.h
+++ b/src/framework/op_info.h
@@ -25,9 +25,8 @@ template <typename Dtype>
 struct OpInfo {
  OpCreator<Dtype> creator_;
  const OpCreator<Dtype> &Creator() const {
-    //    PADDLE_ENFORCE_NOT_NULL(creator_,
+    PADDLE_MOBILE_ENFORCE(creator_ != nullptr,
-    //                            "Operator Creator has not been
+                          "Operator Creator has not been registered");
-    //                            registered");
    return creator_;
  }
 };
@@ -48,17 +47,15 @@ class OpInfoMap {
  }
  void Insert(const std::string &type, const OpInfo<Dtype> &info) {
-    //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
+    PADDLE_MOBILE_ENFORCE(!Has(type), "Operator %s has been registered",
-    //    registered", type);
+                          type.c_str());
    map_.insert({type, info});
  }
  const OpInfo<Dtype> &Get(const std::string &type) const {
    auto op_info_ptr = GetNullable(type);
-    //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
+    PADDLE_MOBILE_ENFORCE(op_info_ptr != nullptr,
-    //    been
+                          "Operator %s has not been registered", type.c_str());
-    //    registered",
-    //                            type);
    return *op_info_ptr;
  }

--- a/src/framework/program/program-optimize/node.cpp
+++ b/src/framework/program/program-optimize/node.cpp
@@ -12,10 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include <sstream>
-#include "framework/operator.h"
 #include "framework/program/program-optimize/node.h"
+#include "framework/operator.h"
 namespace paddle_mobile {

--- a/src/framework/scope.cpp
+++ b/src/framework/scope.cpp
@@ -76,7 +76,6 @@ void Scope::DeleteScope(Scope *scope) const {
  auto it = std::find(kids_.begin(), kids_.end(), scope);
  kids_.erase(it);
  delete scope;
-  // deferent
 }
 void Scope::EraseVars(const std::vector<std::string> &var_names) {
@@ -104,14 +103,6 @@ void Scope::Rename(const std::string &origin_name,
  vars_[new_name] = origin_it->second;
  vars_.erase(origin_it);
 }
-//
-//            std::string Scope::Rename(const std::string& origin_name)
-//            const {
-//                auto var_name = string::Sprintf("%p.%d", this,
-//                vars_.size());
-//                Rename(origin_name, var_name);
-//                return var_name;
-//            }
 Variable *Scope::FindVarLocally(const std::string &name) const {
  auto it = vars_.find(name);

--- a/src/framework/tensor_util.cpp
+++ b/src/framework/tensor_util.cpp
@@ -39,9 +39,6 @@ void TensorCopy(const Tensor &src, Tensor *dst) {
 }
 void TensorCopySync(const Tensor &src, Tensor *dst) {
-  //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
-  //  src.place()
-  //          << " to " << dst_place;
  src.check_memory_size();
  dst->Resize(src.dims());
  dst->set_layout(src.layout());
@@ -69,41 +66,6 @@ struct AnyDTypeVisitor {
  }
 };
-template <typename Predicate>
-inline void AnyImpl(Predicate predicate, const Tensor &tensor,
-                    framework::Tensor *out) {
-  VisitDataType(ToDataType(tensor.type()),
-                AnyDTypeVisitor<Predicate>(predicate, tensor, out));
-}
-template <typename Predicate>
-struct AnyVisitor {
-  const framework::Tensor &tensor_;
-  Predicate predicate_;
-  AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
-      : tensor_(tensor), predicate_(std::move(predicate)) {}
-  bool operator()(void) const {
-    framework::Tensor out;
-    out.Resize({1});
-    out.mutable_data<bool>();
-    AnyImpl(predicate_, tensor_, &out);
-    return this->GetResult(out);
-  }
-  bool GetResult(const framework::Tensor &out) const {
-    return *out.data<bool>();
-  }
-};
-template <typename Predicate>
-inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
-  AnyVisitor<Predicate> visitor(tensor, predicate);
-  //  return platform::VisitPlace(visitor);
-  return visitor();
-}
 struct ContainsNANPredicate {
  template <typename T>
  auto operator()(const T &eigen_vec) const
@@ -113,11 +75,6 @@ struct ContainsNANPredicate {
  }
 };
-bool TensorContainsNAN(const framework::Tensor &tensor) {
-  ContainsNANPredicate predicate;
-  return Any(tensor, predicate);
-}
 struct ContainsInfPredicate {
  template <typename T>
  auto operator()(const T &eigen_vec) const
@@ -127,11 +84,6 @@ struct ContainsInfPredicate {
  }
 };
-bool TensorContainsInf(const framework::Tensor &tensor) {
-  ContainsInfPredicate predicate;
-  return Any(tensor, predicate);
-}
 struct DeserializedDataFunctor {
  DeserializedDataFunctor(void **buf, Tensor *tensor)
      : buf_(buf), tensor_(tensor) {}

--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 #include <vector>
 #include "memory/t_malloc.h"
-#include "platform/data_type.h"
 #include "tensor.h"
 namespace paddle_mobile {

--- a/src/io.h
+++ b/src/io.h
@@ -14,8 +14,8 @@ limitations under the License. */
 #pragma once
-#include <memory.h>
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
@@ -27,7 +27,7 @@ limitations under the License. */
 namespace paddle_mobile {
-template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Loader {
 public:
  const framework::Program<Dtype, P> Load(const std::string &dirname,
@@ -39,7 +39,7 @@ class Loader {
               const std::string &file_path);
 };
-template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Executor {
 public:
  typedef typename PrecisionTrait<P>::ptype Ptype;

--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #include "batchnorm_op.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class BatchNormOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(batch_norm);
 REGISTER_OPERATOR(batch_norm, ops::BatchNormOp);
+#endif
--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #pragma once
 #include <string>
@@ -47,3 +49,5 @@ class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
 #include "operators/box_coder_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -52,3 +54,5 @@ template class BoxCoderOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(box_coder);
 REGISTER_OPERATOR(box_coder, ops::BoxCoderOp);
+#endif
--- a/src/operators/box_coder_op.h
+++ b/src/operators/box_coder_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
 #pragma once
 #include <string>
@@ -50,3 +52,5 @@ class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #include "concat_op.h"
 namespace paddle_mobile {
@@ -62,3 +64,5 @@ template class ConcatOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(concat);
 REGISTER_OPERATOR(concat, ops::ConcatOp);
+#endif
--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #pragma once
 #include <string>
@@ -45,3 +47,5 @@ class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #include "operators/conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
@@ -54,3 +55,5 @@ template class ConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(conv2d);
 REGISTER_OPERATOR(conv2d, ops::ConvOp);
+#endif
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #pragma once
 #include <string>
@@ -53,3 +55,5 @@ inline int ConvOutputSize(int input_size, int filter_size, int dilation,
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #include "operators/depthwise_conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
 #include "operators/conv_op.h"
@@ -55,3 +56,5 @@ template class DepthwiseConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(depthwise_conv2d);
 REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp);
+#endif
--- a/src/operators/depthwise_conv_op.h
+++ b/src/operators/depthwise_conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #pragma once
 #include <string>
@@ -47,3 +49,5 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #include "elementwise_add_op.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class ElementwiseAddOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(elementwise_add);
 REGISTER_OPERATOR(elementwise_add, ops::ElementwiseAddOp);
+#endif
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #pragma once
 #include <string>
@@ -46,3 +48,5 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSIONCONVADD_OP
 #include "operators/fusion_conv_add.h"
 namespace paddle_mobile {
 namespace operators {
@@ -25,3 +27,5 @@ template class FushionConvAddOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(conv_add);
 REGISTER_OPERATOR(conv_add, ops::FushionConvAddOp);
+#endif
--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSIONCONVADD_OP
 #pragma once
 #include <string>
@@ -66,3 +68,5 @@ class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/fusion_conv_add_relu_op.cpp
+++ b/src/operators/fusion_conv_add_relu_op.cpp
@@ -12,4 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONVADDRELU_OP
 #include "fusion_conv_add_relu_op.h"
+#endif
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONVADDRELU_OP
 #pragma once
 #include "framework/operator.h"
@@ -49,3 +51,5 @@ class ConvAddReluOp {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #include "operators/fusion_fc_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -54,3 +56,5 @@ template class FushionFcOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(fc);
 REGISTER_OPERATOR(fc, ops::FushionFcOp);
+#endif
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #pragma once
 #include <string>
@@ -71,3 +73,5 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #pragma once
 #include "operators/kernel/batchnorm_kernel.h"
@@ -91,3 +93,5 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/box_coder_kernel.cpp
+++ b/src/operators/kernel/arm/box_coder_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef BOXCODER_OP
 #include "operators/kernel/box_coder_kernel.h"
@@ -135,3 +135,5 @@ void BoxCoderKernel<CPU, float>::Compute(const BoxCoderParam& param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/concat_kernel.cpp
+++ b/src/operators/kernel/arm/concat_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef CONCAT_OP
 #include "operators/kernel/concat_kernel.h"
@@ -85,3 +85,5 @@ void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
@@ -112,3 +114,5 @@ template class ConvKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp
+++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #include "operators/kernel/depthwise_conv_kernel.h"
 #include "operators/kernel/conv_kernel.h"
@@ -124,3 +126,5 @@ template class DepthwiseConvKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #pragma once
 #include "operators/kernel/elementwise_add_kernel.h"
@@ -40,3 +42,5 @@ template class ElementwiseAddKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/fushion_fc_kernel.cpp
+++ b/src/operators/kernel/arm/fushion_fc_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #pragma once
 #include "operators/kernel/fushion_fc_kernel.h"
@@ -65,3 +67,5 @@ void FushionFcKernel<CPU, float>::Compute(const FushionFcParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/lrn_kernel.cpp
+++ b/src/operators/kernel/arm/lrn_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
 #pragma once
 #include "operators/kernel/lrn_kernel.h"
@@ -42,3 +44,5 @@ template class LrnKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
 #pragma once
 #include "operators/kernel/mul_kernel.h"
@@ -48,3 +50,5 @@ template class MulKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/multiclass_nms_kernel.cpp
+++ b/src/operators/kernel/arm/multiclass_nms_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
 #pragma once
 #include "operators/kernel/multiclass_nms_kernel.h"
@@ -273,3 +275,5 @@ void MultiClassNMSKernel<CPU, float>::Compute(
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/pool_kernel.cpp
+++ b/src/operators/kernel/arm/pool_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #include <operators/kernel/pool_kernel.h>
 #include "common/log.h"
@@ -73,3 +75,5 @@ void PoolKernel<CPU, float>::Compute(const PoolParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/prior_box_kernel.cpp
+++ b/src/operators/kernel/arm/prior_box_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
 #pragma once
 #include "operators/kernel/prior_box_kernel.h"
@@ -143,3 +145,5 @@ void PriorBoxKernel<CPU, float>::Compute(const PriorBoxParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/relu_kernel.cpp
+++ b/src/operators/kernel/arm/relu_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
 #pragma once
 #include "operators/kernel/relu_kernel.h"
@@ -45,3 +47,5 @@ void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/reshape_kernel.cpp
+++ b/src/operators/kernel/arm/reshape_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RESHAPE_OP
 #pragma once
 #include "operators/kernel/reshape_kernel.h"
@@ -49,3 +51,5 @@ void ReshapeKernel<CPU, float>::Compute(const ReshapeParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/sigmoid_kernel.cpp
+++ b/src/operators/kernel/arm/sigmoid_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #include "../sigmoid_kernel.h"
 #if __ARM_NEON
 #include "../../math/math_func_neon.h"
@@ -25,31 +27,19 @@ using framework::Tensor;
 void sigmoid(const Tensor *X, Tensor *Y) {
 #if __ARM_NEON
-  DLOG << "step1";
  const float *input = X->data<float>();
-  DLOG << "step11";
  float *output = Y->mutable_data<float>();
-  DLOG << "step2";
  const DDim &dDim = X->dims();
-  DLOG << "step3";
  int axis_index = 1;
  if (dDim.size() < 4) {
    axis_index = 0;
  }
-  DLOG << "step4";
  DDim outer_ddim =
      paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
  DDim inner_ddim =
      paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
-  DLOG << "step5";
  int out_size = paddle_mobile::framework::product(outer_ddim);
  int inner_size = paddle_mobile::framework::product(inner_ddim);
-  DLOG << "step6";
  DLOG << "outsize=" << out_size;
  DLOG << "innersize=" << inner_size;
@@ -93,3 +83,5 @@ void SigmoidKernel<CPU, float>::Compute(const SigmoidParam &param) const {
 template class SigmoidKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/softmax_kernel.cpp
+++ b/src/operators/kernel/arm/softmax_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #include "../softmax_kernel.h"
 #include "../../math/softmax.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam &param) const {
 template class SoftmaxKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/transpose_kernel.cpp
+++ b/src/operators/kernel/arm/transpose_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef TRANSPOSE_OP
 #include "operators/kernel/transpose_kernel.h"
@@ -70,3 +70,5 @@ void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/batchnorm_kernel.h
+++ b/src/operators/kernel/batchnorm_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -30,3 +33,5 @@ class BatchNormKernel
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/box_coder_kernel.h
+++ b/src/operators/kernel/box_coder_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -31,3 +33,5 @@ class BoxCoderKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/concat_kernel.h
+++ b/src/operators/kernel/concat_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
@@ -29,3 +31,5 @@ class ConcatKernel : public framework::OpKernelBase<DeviceType, ConcatParam> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
@@ -19,8 +23,6 @@ limitations under the License. */
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -49,3 +51,5 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/depthwise_conv_kernel.h
+++ b/src/operators/kernel/depthwise_conv_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
 #include "operators/math/math_function.h"
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -32,3 +34,5 @@ class DepthwiseConvKernel : public OpKernelBase<DeviceType, ConvParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once;
+#ifdef ELEMENTWISEADD_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/elementwise_op_function.h"
@@ -31,3 +33,5 @@ class ElementwiseAddKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 namespace paddle_mobile {
 namespace operators {
@@ -22,3 +24,5 @@ namespace operators {
 // template class ConvKernel<FPGA, float>;
 }
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fushion_fc_kernel.h
+++ b/src/operators/kernel/fushion_fc_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +31,5 @@ class FushionFcKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/lrn_kernel.h
+++ b/src/operators/kernel/lrn_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -70,3 +73,5 @@ class LrnKernel : public framework::OpKernelBase<DeviceType, LrnParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/mali/conv_kernel.cpp
+++ b/src/operators/kernel/mali/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
@@ -23,3 +25,5 @@ void ConvKernel<GPU_MALI, float>::Compute(const ConvParam &param) const {}
 template class ConvKernel<GPU_MALI, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +32,5 @@ class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/multiclass_nms_kernel.h
+++ b/src/operators/kernel/multiclass_nms_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
+#pragma once
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -28,3 +31,5 @@ class MultiClassNMSKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/pool_kernel.h
+++ b/src/operators/kernel/pool_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #include "framework/operator.h"
@@ -29,3 +31,5 @@ class PoolKernel : public OpKernelBase<DeviceType, PoolParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/prior_box_kernel.h
+++ b/src/operators/kernel/prior_box_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -55,3 +57,5 @@ class PriorBoxKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/relu_kernel.h
+++ b/src/operators/kernel/relu_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
+#pragma once
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -27,3 +30,5 @@ class ReluKernel : public framework::OpKernelBase<DeviceType, ReluParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/reshape_kernel.h
+++ b/src/operators/kernel/reshape_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include <vector>
+#ifdef RESHAPE_OP
+#pragma once
+#include <vector>
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -72,3 +74,5 @@ class ReshapeKernel : public framework::OpKernelBase<DeviceType, ReshapeParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/sigmoid_kernel.h
+++ b/src/operators/kernel/sigmoid_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #pragma once
 #include "framework/operator.h"
@@ -27,3 +29,5 @@ class SigmoidKernel : public OpKernelBase<DeviceType, SigmoidParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/softmax_kernel.h
+++ b/src/operators/kernel/softmax_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #pragma once
 #include "framework/operator.h"
@@ -30,3 +32,5 @@ class SoftmaxKernel : public OpKernelBase<DeviceType, SoftmaxParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/transpose_kernel.h
+++ b/src/operators/kernel/transpose_kernel.h
@@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef TRANSPOSE_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -30,3 +32,5 @@ class TransposeKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
 #include "lrn_op.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class LrnOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(lrn);
 REGISTER_OPERATOR(lrn, ops::LrnOp);
+#endif
--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
 #pragma once
 #include <string>
@@ -45,3 +48,5 @@ class LrnOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "operators/math/gemm.h"
+#ifndef X86
+#include <arm_neon.h>
+#endif
 namespace paddle_mobile {
 namespace operators {
 namespace math {
+float ab[MR * NR];
 // 将A矩阵分块复制到连续内存(ColMajor)
 void PackMatrixA(int m, int k, int paddingM, const float *A, int lda,
                 float *buffer) {
@@ -170,17 +174,197 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
 }
 // 计算一个更小的 4 * 4 的 C 矩阵分块
+#if defined(IOS)
+void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
+               int ldb, float beta, float *C, int ldc, int mc, int nc) {
+  // init C
+  float32x4_t cv0 = vdupq_n_f32(0.0);
+  float32x4_t cv1 = vdupq_n_f32(0.0);
+  float32x4_t cv2 = vdupq_n_f32(0.0);
+  float32x4_t cv3 = vdupq_n_f32(0.0);
+  float32x4_t av;
+  float32x4_t bv;
+  float32x2_t av01;
+  float32x2_t av23;
+  for (int p = 0; p < k; p += 1) {
+    av = vld1q_f32(a);
+    bv = vld1q_f32(b);
+    av01 = vget_low_f32(av);
+    cv0 = vmlaq_lane_f32(cv0, bv, av01, 0);
+    cv1 = vmlaq_lane_f32(cv1, bv, av01, 1);
+    av23 = vget_high_f32(av);
+    cv2 = vmlaq_lane_f32(cv2, bv, av23, 0);
+    cv3 = vmlaq_lane_f32(cv3, bv, av23, 1);
+    a += MR;
+    b += NR;
+  }
+  float32x4x4_t cv = {cv0, cv1, cv2, cv3};
+  int i, j;
+  for (i = 0; i < mc; ++i) {
+    for (j = 0; j < nc; ++j) {
+      if (beta == 0.0) {
+        C(i, j) = 0.0;
+      } else if (beta != 1.0) {
+        C(i, j) *= beta;
+      }
+      if (j == 0) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 0);
+      } else if (j == 1) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 1);
+      } else if (j == 2) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 2);
+      } else if (j == 3) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 3);
+      }
+    }
+  }
+}
+#elif defined(ARMV7)
+void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
+               int ldb, float beta, float *C, int ldc, int mc, int nc) {
+  int kc1 = k / 2, kc2 = k % 2;
+  int bytes_ldc = 4 * ldc;
+  int flag_alpha = (alpha == 1.0) ? 1 : 2;
+  int flag_beta;
+  if (beta == 0.0) {
+    flag_beta = 0;
+  } else if (beta == 1.0) {
+    flag_beta = 1;
+  } else {
+    flag_beta = 2;
+  }
+  asm volatile(
+      "vmov.f32   q10,    #0.0        \n\t"
+      "vmov.f32   q11,    #0.0        \n\t"
+      "vmov.f32   q12,    #0.0        \n\t"
+      "vmov.f32   q13,    #0.0        \n\t"
+      "subs       %[kc1], %[kc1], #1  \n\t"
+      "blt        end_kc1_%=          \n\t"
+      "loop_kc1_%=:                   \n\t"
+      "vld1.32    {q0, q1}, [%[a]]!   \n\t"
+      "vld1.32    {q2, q3}, [%[b]]!   \n\t"
+      "vmla.f32   q10, q2, d0[0]      \n\t"
+      "vmla.f32   q11, q2, d0[1]      \n\t"
+      "vmla.f32   q12, q2, d1[0]      \n\t"
+      "vmla.f32   q13, q2, d1[1]      \n\t"
+      "vmla.f32   q10, q3, d2[0]      \n\t"
+      "vmla.f32   q11, q3, d2[1]      \n\t"
+      "vmla.f32   q12, q3, d3[0]      \n\t"
+      "vmla.f32   q13, q3, d3[1]      \n\t"
+      "subs       %[kc1], %[kc1], #1  \n\t"
+      "bge        loop_kc1_%=         \n\t"
+      "end_kc1_%=:                    \n\t"
+      "subs       %[kc2], %[kc2], #1  \n\t"
+      "blt        end_kc2_%=          \n\t"
+      "vld1.32    {q0}, [%[a]]!       \n\t"
+      "vld1.32    {q1}, [%[b]]!       \n\t"
+      "vmla.f32   q10, q1, d0[0]      \n\t"
+      "vmla.f32   q11, q1, d0[1]      \n\t"
+      "vmla.f32   q12, q1, d1[0]      \n\t"
+      "vmla.f32   q13, q1, d1[1]      \n\t"
+      "end_kc2_%=:                    \n\t"
+      "cmp        %[mc],      #4      \n\t"
+      "bne        temp_%=             \n\t"
+      "cmp        %[nc],      #4      \n\t"
+      "bne        temp_%=             \n\t"
+      "vmov.f32   d8[0],    %[alpha]  \n\t"
+      "vmov.f32   d8[1],    %[beta]   \n\t"
+      "cmp        %[flag_alpha],  #1  \n\t"
+      "bne        alpha_%=            \n\t"
+      "alpha_%=:                      \n\t"
+      "vmul.f32   q10, q10, d8[0]     \n\t"
+      "vmul.f32   q11, q11, d8[0]     \n\t"
+      "vmul.f32   q12, q12, d8[0]     \n\t"
+      "vmul.f32   q13, q13, d8[0]     \n\t"
+      "beta_%=:                       \n\t"
+      "cmp        %[flag_beta],   #0  \n\t"
+      "beq        memory_%=           \n\t"
+      "mov        r4,     %[C]        \n\t"
+      "mov        r6,     %[bytes_ldc]\n\t"
+      "vld1.32    {q0}, [r4], r6      \n\t"
+      "vld1.32    {q1}, [r4], r6      \n\t"
+      "vld1.32    {q2}, [r4], r6      \n\t"
+      "vld1.32    {q3}, [r4]          \n\t"
+      "cmp        %[flag_beta],   #1  \n\t"
+      "beq        beta_eq1_%=         \n\t"
+      "bne        beta_ne1_%=         \n\t"
+      "beta_eq1_%=:                   \n\t"
+      "vadd.f32   q10, q10, q0        \n\t"
+      "vadd.f32   q11, q11, q1        \n\t"
+      "vadd.f32   q12, q12, q2        \n\t"
+      "vadd.f32   q13, q13, q3        \n\t"
+      "b          memory_%=           \n\t"
+      "beta_ne1_%=:                   \n\t"
+      "vmla.f32   q10, q0, d8[1]      \n\t"
+      "vmla.f32   q11, q1, d8[1]      \n\t"
+      "vmla.f32   q12, q2, d8[1]      \n\t"
+      "vmla.f32   q13, q3, d8[1]      \n\t"
+      "memory_%=:                     \n\t"
+      "mov        r5,     %[C]        \n\t"
+      "mov        r6,     %[bytes_ldc]\n\t"
+      "vst1.32    {q10}, [r5], r6     \n\t"
+      "vst1.32    {q11}, [r5], r6     \n\t"
+      "vst1.32    {q12}, [r5], r6     \n\t"
+      "vst1.32    {q13}, [r5]         \n\t"
+      "b          end_%=              \n\t"
+      "temp_%=:                       \n\t"
+      "vst1.32    {q10, q11}, [%[ab]]!\n\t"
+      "vst1.32    {q12, q13}, [%[ab]] \n\t"
+      "end_%=:                        \n\t"
+      :
+      : [a] "r"(a), [b] "r"(b), [C] "r"(C), [ab] "r"(ab), [kc1] "r"(kc1),
+        [kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha),
+        [beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc),
+        [flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta)
+      : "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13");
+  if (mc != MR || nc != NR) {
+    int i, j;
+    for (i = 0; i < mc; ++i) {
+      for (j = 0; j < nc; ++j) {
+        if (beta == 0.0) {
+          if (alpha != 1.0) {
+            C(i, j) = alpha * ab[i * MR + j];
+          } else {
+            C(i, j) = ab[i * MR + j];
+          }
+        } else {
+          if (beta != 1.0) {
+            C(i, j) *= beta;
+          }
+          if (alpha != 1.0) {
+            C(i, j) += alpha * ab[i * MR + j];
+          } else {
+            C(i, j) += ab[i * MR + j];
+          }
+        }
+      }
+    }
+  }
+}
+#else
 void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
               int ldb, float beta, float *C, int ldc, int mc, int nc) {
  float c[16] = {0};
  float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3;
-  // // init C
-  // float32x4_t cv0 = vdup_n_f32(0.0);
-  // float32x4_t cv1 = vdup_n_f32(0.0);
-  // float32x4_t cv2 = vdup_n_f32(0.0);
-  // float32x4_t cv3 = vdup_n_f32(0.0);
  for (int p = 0; p < k; p += 1) {
    reg_b0 = *b++;
    reg_b1 = *b++;
@@ -232,6 +416,7 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
    }
  }
 }
+#endif
 // 32位 float 矩阵乘法
 void sgemm(int m, int n, int k, float alpha, const float *A, int lda,

--- a/src/operators/math/pool3x3.h
+++ b/src/operators/math/pool3x3.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #if __ARM_NEON
@@ -25,3 +27,5 @@ static void Pool3x3Max() {
 static void Pool3x3Avg() {
  // todo impl with neon
 }
+#endif
--- a/src/operators/math/pool_2x2.h
+++ b/src/operators/math/pool_2x2.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #if __ARM_NEON
@@ -25,3 +27,5 @@ static void Pool2x2Max() {
 static void Pool2x2Avg() {
  // todo impl with neon
 }
+#endif
--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #include "pooling.h"
 #include <common/types.h>
@@ -91,3 +93,5 @@ template class PoolFunctor<CPU, math::MaxPool<float>, float>;
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #include "common/log.h"
@@ -64,3 +66,5 @@ class PoolFunctor {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/math/softmax.cpp
+++ b/src/operators/math/softmax.cpp
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #include "operators/math/softmax.h"
 #include "common/types.h"
 #if __ARM_NEON
@@ -153,3 +156,4 @@ template class SoftmaxFuntor<CPU, float>;
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/math/softmax.h
+++ b/src/operators/math/softmax.h
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #pragma once
 #include "framework/tensor.h"
 namespace paddle_mobile {
@@ -26,3 +27,4 @@ class SoftmaxFuntor {
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
 #include "mul_op.h"
 namespace paddle_mobile {
@@ -55,3 +57,5 @@ template class MulOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(mul);
 REGISTER_OPERATOR(mul, ops::MulOp);
+#endif
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
 #pragma once
 #include <string>
@@ -45,3 +48,5 @@ class MulOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
 #include "operators/multiclass_nms_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -39,3 +41,5 @@ template class MultiClassNMSOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(multiclass_nms);
 REGISTER_OPERATOR(multiclass_nms, ops::MultiClassNMSOp);
+#endif
--- a/src/operators/multiclass_nms_op.h
+++ b/src/operators/multiclass_nms_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
 #pragma once
 #include <string>
@@ -50,3 +52,5 @@ class MultiClassNMSOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -13,9 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "op_param.h"
 namespace paddle_mobile {
 namespace operators {
+#ifdef CONV_OP
 Print &operator<<(Print &printer, const ConvParam &conv_param) {
  printer << "parameter of conv: "
          << "\n";
@@ -36,5 +37,7 @@ Print &operator<<(Print &printer, const ConvParam &conv_param) {
  printer << "  output dims: " << conv_param.Output()->dims();
  return printer;
 }
+#endif
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -191,6 +191,7 @@ class OpParam {
  }
 };
+#ifdef CONV_OP
 class ConvParam : OpParam {
 public:
  ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -230,7 +231,9 @@ class ConvParam : OpParam {
 };
 Print &operator<<(Print &printer, const ConvParam &conv_param);
+#endif
+#ifdef ELEMENTWISEADD_OP
 class ElementwiseAddParam : OpParam {
 public:
  ElementwiseAddParam(const VariableNameMap &inputs,
@@ -258,6 +261,9 @@ class ElementwiseAddParam : OpParam {
  int axis_;
 };
+#endif
+#ifdef MUL_OP
 class MulParam : OpParam {
 public:
  MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -287,7 +293,9 @@ class MulParam : OpParam {
  int x_num_col_dims_;
  int y_num_col_dims_;
 };
+#endif
+#ifdef CONCAT_OP
 class ConcatParam : public OpParam {
 public:
  ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -309,7 +317,9 @@ class ConcatParam : public OpParam {
  Tensor *out_;
  int axis_;
 };
+#endif
+#ifdef LRN_OP
 class LrnParam : public OpParam {
 public:
  LrnParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -351,6 +361,9 @@ class LrnParam : public OpParam {
  float k_;
  string data_format_;
 };
+#endif
+#ifdef BATCHNORM_OP
 class BatchNormParam : OpParam {
 public:
  BatchNormParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -399,6 +412,9 @@ class BatchNormParam : OpParam {
  bool is_test_;
  string data_format_;
 };
+#endif
+#ifdef POOL_OP
 class PoolParam : public OpParam {
 public:
  PoolParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -442,6 +458,9 @@ class PoolParam : public OpParam {
  bool gloabal_pooling_ = false;
 };
+#endif
+#ifdef PRIORBOX_OP
 class PriorBoxParam : public OpParam {
 public:
  PriorBoxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -503,7 +522,9 @@ class PriorBoxParam : public OpParam {
  float step_h_;
  float offset_;
 };
+#endif
+#ifdef BOXCODER_OP
 class BoxCoderParam : public OpParam {
 public:
  BoxCoderParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -533,7 +554,9 @@ class BoxCoderParam : public OpParam {
  Tensor *output_box_;
  std::string code_type_;
 };
+#endif
+#ifdef SOFTMAX_OP
 class SoftmaxParam : public OpParam {
 public:
  SoftmaxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -549,7 +572,9 @@ class SoftmaxParam : public OpParam {
  Tensor *input_x_;
  Tensor *out_;
 };
+#endif
+#ifdef SIGMOID_OP
 class SigmoidParam : public OpParam {
 public:
  SigmoidParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -565,6 +590,9 @@ class SigmoidParam : public OpParam {
  Tensor *input_x_;
  Tensor *out_;
 };
+#endif
+#ifdef MULTICLASSNMS_OP
 class MultiClassNMSParam : public OpParam {
 public:
  MultiClassNMSParam(const VariableNameMap &inputs,
@@ -610,6 +638,7 @@ class MultiClassNMSParam : public OpParam {
  float nms_eta_;
  float score_threshold_;
 };
+#endif
 class FeedParam : public OpParam {
 public:
@@ -646,6 +675,7 @@ class FetchParam : public OpParam {
  Tensor *out_;
 };
+#ifdef TRANSPOSE_OP
 class TransposeParam : public OpParam {
 public:
  TransposeParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -666,7 +696,9 @@ class TransposeParam : public OpParam {
  Tensor *out_;
  vector<int> axis_;
 };
+#endif
+#ifdef RESHAPE_OP
 class ReshapeParam : public OpParam {
 public:
  ReshapeParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -695,7 +727,9 @@ class ReshapeParam : public OpParam {
  vector<int> shape_;
  bool inplace_;
 };
+#endif
+#ifdef RELU_OP
 /*
 * @b op 层实例化好这个 param 传递给 kernel 层使用
 * */
@@ -715,7 +749,9 @@ class ReluParam : public OpParam {
  Tensor *input_x_;
  Tensor *out_;
 };
+#endif
+#ifdef FUSION_FC_OP
 class FushionFcParam : public OpParam {
 public:
  FushionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -751,6 +787,7 @@ class FushionFcParam : public OpParam {
  int y_num_col_dims_;
  int axis_;
 };
+#endif
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #include "pool_op.h"
 namespace paddle_mobile {
@@ -57,3 +59,5 @@ template class PoolOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(pool2d);
 REGISTER_OPERATOR(pool2d, ops::PoolOp);
+#endif
--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #include <framework/operator.h>
@@ -47,3 +49,5 @@ class PoolOp : public OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
 #include "operators/prior_box_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -49,3 +51,5 @@ template class PriorBoxOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(prior_box);
 REGISTER_OPERATOR(prior_box, ops::PriorBoxOp);
+#endif
--- a/src/operators/prior_box_op.h
+++ b/src/operators/prior_box_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
 #pragma once
 #include <string>
@@ -50,3 +52,5 @@ class PriorBoxOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
 #include "operators/relu_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -33,3 +35,5 @@ template class ReluOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(relu);
 REGISTER_OPERATOR(relu, ops::ReluOp);
+#endif
--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
 #pragma once
 #include <string>
@@ -59,3 +61,5 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RESHAPE_OP
 #include "operators/reshape_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -32,3 +34,5 @@ template class ReshapeOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(reshape);
 REGISTER_OPERATOR(reshape, ops::ReshapeOp);
+#endif
--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RESHAPE_OP
 #pragma once
 #include <string>
@@ -49,3 +51,5 @@ class ReshapeOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #include "operators/sigmoid_op.h"
 namespace paddle_mobile {
@@ -27,3 +29,5 @@ template class SigmoidOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(sigmoid);
 REGISTER_OPERATOR(sigmoid, ops::SigmoidOp);
+#endif
--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #pragma once
 #include <framework/operator.h>
@@ -47,3 +49,5 @@ class SigmoidOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #include "operators/softmax_op.h"
 namespace paddle_mobile {
@@ -27,3 +29,5 @@ template class SoftmaxOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(softmax);
 REGISTER_OPERATOR(softmax, ops::SoftmaxOp);
+#endif
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #pragma once
 #include <framework/operator.h>
@@ -47,3 +49,5 @@ class SoftmaxOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
--- a/src/operators/transpose_op.h
+++ b/src/operators/transpose_op.h
--- a/src/platform/data_type.h
+++ b/src/platform/data_type.h
--- a/src/platform/macros.h
+++ b/src/platform/macros.h
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
--- a/test/common/test_openmp.cc
+++ b/test/common/test_openmp.cc
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
--- a/scripts/push2android.sh
+++ b/scripts/push2android.sh