diff --git a/CMakeLists.txt b/CMakeLists.txt
index f60846e98aa9ca36bd6bd68cccdda6e3d2ff616a..02e4ebb3c79e699c7a383abec6ca7d3c6f0dc87d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,48 @@
 cmake_minimum_required(VERSION 3.0)
 project(paddle-mobile)
-add_definitions(-DPADDLE_MOBILE_DEBUG)
-add_definitions(-DENABLE_EXCEPTION)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-set(CMAKE_BUILD_TYPE RelWithDebInfo)
+option(DEBUGING "enable debug mode" OFF)
+option(USE_OPENMP "openmp support" OFF)
+option(USE_EXCEPTION "use std exception" OFF)
+
+if (DEBUGING)
+    set(CMAKE_BUILD_TYPE Debug)
+else()
+    set(CMAKE_BUILD_TYPE Release)
+endif ()
+
+if(DEBUGING)
+    message(STATUS "debuging")
+    add_definitions(-DPADDLE_MOBILE_DEBUG)
+    if(ANDROID)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
+    endif()
+
+else()
+    message(STATUS "releasing")
+    add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
+endif()
+
+if (USE_EXCEPTION)
+    add_definitions(-DENABLE_EXCEPTION)
+    add_definitions(-fexceptions)
+else()
+    add_definitions(-fno-exceptions)
+endif ()
+
+if(IS_MAC)
+    add_definitions(-DX86)
+elseif(IS_IOS)
+    add_definitions(-DIOS)
+elseif(V7)
+    add_definitions(-DARMV7)
+elseif(V8)
+    add_definitions(-DARMV8)
+else ()
+    add_definitions(-DX86)
+endif()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
 set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
@@ -14,26 +52,86 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
 file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
 
-# include headers
+if (NOT ANDROID)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.cpp)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.h)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
+endif ()
+
 include_directories(src/)
 
-#include(ExternalProject)
-#ExternalProject_Add(openblas_proj
-#        GIT_REPOSITORY "https://github.com/xianyi/OpenBLAS.git"
-#        GIT_TAG "v0.2.20"
-#        SOURCE_DIR "openblas/"
-#        BUILD_IN_SOURCE 1
-#        CONFIGURE_COMMAND ""
-#        BUILD_COMMAND "make" "ONLY_CBLAS=1"
-#        INSTALL_COMMAND "make" "PREFIX=${CMAKE_BINARY_DIR}/" "install"
-#        )
-#set_target_properties(openblas_proj PROPERTIES EXCLUDE_FROM_ALL 1)
+if(USE_OPENMP)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+    add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
+endif()
+
+if (googlenet)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DLRN_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+elseif (mobilenet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+elseif (yolo)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+elseif (squeezenet)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSOFTMAX_OP)
+elseif(resnet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+else ()
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DBOXCODER_OP)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSIONCONVADD_OP)
+    add_definitions(-DCONVADDRELU_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DLRN_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DMULTICLASSNMS_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DPRIORBOX_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSIGMOID_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DTRANSPOSE_OP)
+endif()
+
 
-#add_dependencies(paddle-mobile openblas_proj)
+add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
 
-# gen static
-ADD_LIBRARY(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
+if(DEBUGING)
+    add_subdirectory(test)
+endif()
 
-#add_dependencies(paddle-mobile openblas_proj)
 
-add_subdirectory(test)
diff --git a/build.sh b/build.sh
deleted file mode 100755
index dc31f3b47f47975309e46c063bee142d1cc1a14f..0000000000000000000000000000000000000000
--- a/build.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env bash
-
-build_for_mac() {
-    if [ ! `which brew` ]; then
-        echo "building failed! homebrew not found, please install homebrew."
-        return
-    fi
-    if [ ! `which cmake` ]; then
-        echo "installing cmake."
-        brew install cmake
-        if [ ! $? ]; then
-            echo "cmake install failed."
-            return
-        fi
-    fi
-    PLATFORM="x86"
-    MODE="Release"
-    CXX_FLAGS="-std=c++11 -O3 -s"
-    BUILD_DIR=build/release/"${PLATFORM}"
-    mkdir -p ${BUILD_DIR}/build
-
-    mkdir -p ${BUILD_DIR}/test
-    cp -r test/models ${BUILD_DIR}/test/models
-
-    cmake . \
-        -B"${BUILD_DIR}" \
-    	-DCMAKE_BUILD_TYPE="${MODE}" \
-    	-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-    	-DIS_MAC=true
-
-    cd ${BUILD_DIR}
-    make -j 8
-}
-
-build_for_android() {
-    if [ -z "${ANDROID_NDK}" ]; then
-        echo "ANDROID_NDK not found!"
-        exit -1
-    fi
-
-    PLATFORM="arm-v7a"
-#    PLATFORM="arm-v8a"
-
-    if [ "${PLATFORM}" = "arm-v7a" ]; then
-        ABI="armeabi-v7a with NEON"
-        ARM_PLATFORM="V7"
-        CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security -llog"
-    elif [ "${PLATFORM}" = "arm-v8a" ]; then
-        ABI="arm64-v8a"
-        ARM_PLATFORM="V8"
-        CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a  -pie -fPIE -w -Wno-error=format-security -llog"
-    else
-        echo "unknown platform!"
-        exit -1
-    fi
-
-    MODE="Release"
-    ANDROID_PLATFORM_VERSION="android-15"
-    TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
-    ANDROID_ARM_MODE="arm"
-
-    cmake . \
-        -B"build/release/${PLATFORM}" \
-        -DANDROID_ABI="${ABI}" \
-        -DCMAKE_BUILD_TYPE="${MODE}" \
-        -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-        -DANDROID_PLATFORM="${ANDROID_PLATFORM_VERSION}" \
-        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-        -DANDROID_STL=c++_static \
-        -DANDROID=true \
-        -D"${ARM_PLATFORM}"=true
-
-    cd "./build/release/${PLATFORM}"
-    make -j 8
-}
-
-build_for_ios() {
-    PLATFORM="ios"
-    MODE="Release"
-    BUILD_DIR=build/release/"${PLATFORM}"
-    TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake"
-    C_FLAGS="-fobjc-abi-version=2 -fobjc-arc -isysroot ${CMAKE_OSX_SYSROOT}"
-    CXX_FLAGS="-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT}"
-    mkdir -p "${BUILD_DIR}"
-
-    cmake . \
-        -B"${BUILD_DIR}" \
-        -DCMAKE_BUILD_TYPE="${MODE}" \
-        -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
-        -DIOS_PLATFORM=OS \
-        -DCMAKE_C_FLAGS="${C_FLAGS}" \
-        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-        -DIS_IOS="true" \
-
-    cd "${BUILD_DIR}"
-    make -j 8
-}
-
-build_error() {
-    echo "unknown argument"
-}
-
-if [ $# -lt 1 ]; then
-	echo "error: target missing!"
-    echo "available targets: mac|linux|ios|android"
-    echo "sample usage: ./build.sh mac"
-else
-	if [ $1 = "mac" ]; then
-		build_for_mac
-	elif [ $1 = "linux" ]; then
-		build_for_linux
-	elif [ $1 = "android" ]; then
-		build_for_android
-	elif [ $1 = "ios" ]; then
-		build_for_ios
-	else
-		build_error
-	fi
-fi
diff --git a/src/common/enforce.h b/src/common/enforce.h
index 52bda2258a00c7444762fe8297380c1c7752dd42..51d2110e32433686d1b3353bc63b92a564a13e9d 100644
--- a/src/common/enforce.h
+++ b/src/common/enforce.h
@@ -17,8 +17,6 @@ limitations under the License. */
 #ifdef ENABLE_EXCEPTION
 #include <stdio.h>
 #include <exception>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 
 #endif
@@ -32,12 +30,11 @@ struct PaddleMobileException : public std::exception {
 
   PaddleMobileException(const char *header, const char *detail,
                         const char *file, const int line) {
-    std::stringstream ss;
-    ss << exception_prefix << "| " << header << "\n";
-    ss << "| [in file] : " << file << " \n";
-    ss << "| [on line] : " << line << " \n";
-    ss << "| [detail]  : " << detail;
-    message = ss.str();
+    char buffer[1500];
+    snprintf(buffer, sizeof(buffer),
+             "%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail]  : %s\n",
+             exception_prefix.c_str(), header, file, line, detail);
+    message = std::string(buffer);
   }
   const char *what() const noexcept { return message.c_str(); }
 };
diff --git a/src/common/log.h b/src/common/log.h
index 052fb7df2ba74177205ef26cbebbc88c08e03e09..07afdb39d04f2bf3ba083f79e812fb951a6194be 100644
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -20,11 +20,38 @@ limitations under the License. */
 #include <sstream>
 #include <string>
 #endif
+#ifdef ANDROID
+#include <android/log.h>
+#endif
 
 namespace paddle_mobile {
 
 #ifdef PADDLE_MOBILE_DEBUG
 
+#ifdef ANDROID
+
+extern const char *ANDROID_LOG_TAG;
+
+#define ANDROIDLOGI(...)                                               \
+  __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGW(...)                                                  \
+  __android_log_print(ANDROID_LOG_WARNING, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGD(...)                                                \
+  __android_log_print(ANDROID_LOG_DEBUG, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGE(...)                                                \
+  __android_log_print(ANDROID_LOG_ERROR, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#else
+#define ANDROIDLOGI(...)
+#define ANDROIDLOGW(...)
+#define ANDROIDLOGD(...)
+#define ANDROIDLOGE(...)
+
+#endif
+
 enum LogLevel {
   kNO_LOG,
   kLOG_ERROR,
@@ -122,6 +149,11 @@ struct ToLog {
 
 #else
 
+#define ANDROIDLOGI(...)
+#define ANDROIDLOGW(...)
+#define ANDROIDLOGD(...)
+#define ANDROIDLOGE(...)
+
 enum LogLevel {
   kNO_LOG,
   kLOG_ERROR,
diff --git a/src/common/log.cpp b/src/common/macros.h
similarity index 89%
rename from src/common/log.cpp
rename to src/common/macros.h
index dbc4554c5f2ef6ea2dc9ec76714277e8e24b0c8a..ee38f19c9285b369e48c550b67f6c397695e73cf 100644
--- a/src/common/log.cpp
+++ b/src/common/macros.h
@@ -12,6 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "log.h"
+#pragma once
 
-namespace paddle_mobile {}
+#define EXPORT __attribute__((visibility("default")))
diff --git a/src/platform/macros.h b/src/common/openmp-fix.cpp
similarity index 51%
rename from src/platform/macros.h
rename to src/common/openmp-fix.cpp
index ce133562cae0e4cd8720973c8f71ebca0e7e897d..8c31ef45c68227c612155e826e664367a7917501 100644
--- a/src/platform/macros.h
+++ b/src/common/openmp-fix.cpp
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
-
-// Disable the copy and assignment operator for a class.
-#ifndef DISABLE_COPY_AND_ASSIGN
-#define DISABLE_COPY_AND_ASSIGN(classname)          \
- private:                                           \
-  classname(const classname &) = delete;            \
-  classname(classname &&) = delete;                 \
-  classname &operator=(const classname &) = delete; \
-  classname &operator=(classname &&) = delete
+#ifdef PADDLE_MOBILE_USE_OPENMP
+/**
+ * android-ndk-r17 has a problem when linking with openmp.
+ * if paddle-mobile enables -fopenmp, but didn't use those omp_* functions,
+ * after linking another binary with libpaddle-mobile.so, the omp_get_thread_num
+ * will not work. see test/common/test_openmp.cc the detailed reason is still
+ * unclear, but this trick will work. a better solution is hacking the linker,
+ * try some flags to make it link omp_* functions, but I didn't find out how to
+ * make it work.
+ */
+#include <omp.h>
+static int _ = omp_get_num_procs();
 #endif
diff --git a/src/common/protobuf-c.c b/src/common/protobuf-c.c
index fd0e3d80a21282fe7bb600c2fdb174411fa315a3..1092e3f78b02a343d8c8965ea7b2d777a6fac9ae 100644
--- a/src/common/protobuf-c.c
+++ b/src/common/protobuf-c.c
@@ -711,47 +711,6 @@ static inline size_t uint32_pack(uint32_t value, uint8_t *out) {
   return rv;
 }
 
-/**
- * Pack a signed 32-bit integer and return the number of bytes written.
- * Negative numbers are encoded as two's complement 64-bit integers.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t int32_pack(int32_t value, uint8_t *out) {
-  if (value < 0) {
-    out[0] = value | 0x80;
-    out[1] = (value >> 7) | 0x80;
-    out[2] = (value >> 14) | 0x80;
-    out[3] = (value >> 21) | 0x80;
-    out[4] = (value >> 28) | 0x80;
-    out[5] = out[6] = out[7] = out[8] = 0xff;
-    out[9] = 0x01;
-    return 10;
-  } else {
-    return uint32_pack(value, out);
-  }
-}
-
-/**
- * Pack a signed 32-bit integer using ZigZag encoding and return the number of
- * bytes written.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t sint32_pack(int32_t value, uint8_t *out) {
-  return uint32_pack(zigzag32(value), out);
-}
-
 /**
  * Pack a 64-bit unsigned integer using base-128 varint encoding and return the
  * number of bytes written.
@@ -789,116 +748,6 @@ static size_t uint64_pack(uint64_t value, uint8_t *out) {
   return rv;
 }
 
-/**
- * Pack a 64-bit signed integer in ZigZag encoding and return the number of
- * bytes written.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t sint64_pack(int64_t value, uint8_t *out) {
-  return uint64_pack(zigzag64(value), out);
-}
-
-/**
- * Pack a 32-bit quantity in little-endian byte order. Used for protobuf wire
- * types fixed32, sfixed32, float. Similar to "htole32".
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t fixed32_pack(uint32_t value, void *out) {
-#if !defined(WORDS_BIGENDIAN)
-  memcpy(out, &value, 4);
-#else
-  uint8_t *buf = out;
-
-  buf[0] = value;
-  buf[1] = value >> 8;
-  buf[2] = value >> 16;
-  buf[3] = value >> 24;
-#endif
-  return 4;
-}
-
-/**
- * Pack a 64-bit quantity in little-endian byte order. Used for protobuf wire
- * types fixed64, sfixed64, double. Similar to "htole64".
- *
- * \todo The big-endian impl is really only good for 32-bit machines, a 64-bit
- * version would be appreciated, plus a way to decide to use 64-bit math where
- * convenient.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t fixed64_pack(uint64_t value, void *out) {
-#if !defined(WORDS_BIGENDIAN)
-  memcpy(out, &value, 8);
-#else
-  fixed32_pack(value, out);
-  fixed32_pack(value >> 32, ((char *)out) + 4);
-#endif
-  return 8;
-}
-
-/**
- * Pack a boolean value as an integer and return the number of bytes written.
- *
- * \todo Perhaps on some platforms *out = !!value would be a better impl, b/c
- * that is idiomatic C++ in some STL implementations.
- *
- * \param value
- *      Value to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t boolean_pack(protobuf_c_boolean value, uint8_t *out) {
-  *out = value ? TRUE : FALSE;
-  return 1;
-}
-
-/**
- * Pack a NUL-terminated C string and return the number of bytes written. The
- * output includes a length delimiter.
- *
- * The NULL pointer is treated as an empty string. This isn't really necessary,
- * but it allows people to leave required strings blank. (See Issue #13 in the
- * bug tracker for a little more explanation).
- *
- * \param str
- *      String to encode.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t string_pack(const char *str, uint8_t *out) {
-  if (str == NULL) {
-    out[0] = 0;
-    return 1;
-  } else {
-    size_t len = strlen(str);
-    size_t rv = uint32_pack(len, out);
-    memcpy(out + rv, str, len);
-    return rv + len;
-  }
-}
-
 /**
  * Pack a ProtobufCBinaryData and return the number of bytes written. The output
  * includes a length delimiter.
@@ -918,30 +767,6 @@ static inline size_t binary_data_pack(const ProtobufCBinaryData *bd,
   return rv + len;
 }
 
-/**
- * Pack a ProtobufCMessage and return the number of bytes written. The output
- * includes a length delimiter.
- *
- * \param message
- *      ProtobufCMessage object to pack.
- * \param[out] out
- *      Packed message.
- * \return
- *      Number of bytes written to `out`.
- */
-static inline size_t prefixed_message_pack(const ProtobufCMessage *message,
-                                           uint8_t *out) {
-  if (message == NULL) {
-    out[0] = 0;
-    return 1;
-  } else {
-    size_t rv = protobuf_c_message_pack(message, out + 1);
-    uint32_t rv_packed_size = uint32_size(rv);
-    if (rv_packed_size != 1) memmove(out + rv_packed_size, out + 1, rv);
-    return uint32_pack(rv, out) + rv;
-  }
-}
-
 /**
  * Pack a field tag.
  *
@@ -963,143 +788,6 @@ static size_t tag_pack(uint32_t id, uint8_t *out) {
     return uint64_pack(((uint64_t)id) << 3, out);
 }
 
-/**
- * Pack a required field and return the number of bytes written.
- *
- * \param field
- *      Field descriptor.
- * \param member
- *      The field member.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t required_field_pack(const ProtobufCFieldDescriptor *field,
-                                  const void *member, uint8_t *out) {
-  size_t rv = tag_pack(field->id, out);
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SINT32:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + sint32_pack(*(const int32_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + int32_pack(*(const int32_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_UINT32:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + uint32_pack(*(const uint32_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_SINT64:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + sint64_pack(*(const int64_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + uint64_pack(*(const uint64_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_32BIT;
-      return rv + fixed32_pack(*(const uint32_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_64BIT;
-      return rv + fixed64_pack(*(const uint64_t *)member, out + rv);
-    case PROTOBUF_C_TYPE_BOOL:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_VARINT;
-      return rv + boolean_pack(*(const protobuf_c_boolean *)member, out + rv);
-    case PROTOBUF_C_TYPE_STRING:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
-      return rv + string_pack(*(char *const *)member, out + rv);
-    case PROTOBUF_C_TYPE_BYTES:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
-      return rv +
-             binary_data_pack((const ProtobufCBinaryData *)member, out + rv);
-    case PROTOBUF_C_TYPE_MESSAGE:
-      out[0] |= PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED;
-      return rv + prefixed_message_pack(*(ProtobufCMessage *const *)member,
-                                        out + rv);
-  }
-  PROTOBUF_C__ASSERT_NOT_REACHED();
-  return 0;
-}
-
-/**
- * Pack a oneof field and return the number of bytes written. Only packs the
- * field that is selected by the case enum.
- *
- * \param field
- *      Field descriptor.
- * \param oneof_case
- *      Enum value that selects the field in the oneof.
- * \param member
- *      The field member.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t oneof_field_pack(const ProtobufCFieldDescriptor *field,
-                               uint32_t oneof_case, const void *member,
-                               uint8_t *out) {
-  if (oneof_case != field->id) {
-    return 0;
-  }
-  if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
-      field->type == PROTOBUF_C_TYPE_STRING) {
-    const void *ptr = *(const void *const *)member;
-    if (ptr == NULL || ptr == field->default_value) return 0;
-  }
-  return required_field_pack(field, member, out);
-}
-
-/**
- * Pack an optional field and return the number of bytes written.
- *
- * \param field
- *      Field descriptor.
- * \param has
- *      Whether the field is set.
- * \param member
- *      The field member.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t optional_field_pack(const ProtobufCFieldDescriptor *field,
-                                  const protobuf_c_boolean has,
-                                  const void *member, uint8_t *out) {
-  if (field->type == PROTOBUF_C_TYPE_MESSAGE ||
-      field->type == PROTOBUF_C_TYPE_STRING) {
-    const void *ptr = *(const void *const *)member;
-    if (ptr == NULL || ptr == field->default_value) return 0;
-  } else {
-    if (!has) return 0;
-  }
-  return required_field_pack(field, member, out);
-}
-
-/**
- * Pack an unlabeled field and return the number of bytes written.
- *
- * \param field
- *      Field descriptor.
- * \param member
- *      The field member.
- * \param[out] out
- *      Packed value.
- * \return
- *      Number of bytes written to `out`.
- */
-static size_t unlabeled_field_pack(const ProtobufCFieldDescriptor *field,
-                                   const void *member, uint8_t *out) {
-  if (field_is_zeroish(field, member)) return 0;
-  return required_field_pack(field, member, out);
-}
-
 /**
  * Given a field type, return the in-memory size.
  *
@@ -1139,236 +827,6 @@ static inline size_t sizeof_elt_in_repeated_array(ProtobufCType type) {
   return 0;
 }
 
-/**
- * Pack an array of 32-bit quantities.
- *
- * \param[out] out
- *      Destination.
- * \param[in] in
- *      Source.
- * \param[in] n
- *      Number of elements in the source array.
- */
-static void copy_to_little_endian_32(void *out, const void *in,
-                                     const unsigned n) {
-#if !defined(WORDS_BIGENDIAN)
-  memcpy(out, in, n * 4);
-#else
-  unsigned i;
-  const uint32_t *ini = in;
-  for (i = 0; i < n; i++) fixed32_pack(ini[i], (uint32_t *)out + i);
-#endif
-}
-
-/**
- * Pack an array of 64-bit quantities.
- *
- * \param[out] out
- *      Destination.
- * \param[in] in
- *      Source.
- * \param[in] n
- *      Number of elements in the source array.
- */
-static void copy_to_little_endian_64(void *out, const void *in,
-                                     const unsigned n) {
-#if !defined(WORDS_BIGENDIAN)
-  memcpy(out, in, n * 8);
-#else
-  unsigned i;
-  const uint64_t *ini = in;
-  for (i = 0; i < n; i++) fixed64_pack(ini[i], (uint64_t *)out + i);
-#endif
-}
-
-/**
- * Get the minimum number of bytes required to pack a field value of a
- * particular type.
- *
- * \param type
- *      Field type.
- * \return
- *      Number of bytes.
- */
-static unsigned get_type_min_size(ProtobufCType type) {
-  if (type == PROTOBUF_C_TYPE_SFIXED32 || type == PROTOBUF_C_TYPE_FIXED32 ||
-      type == PROTOBUF_C_TYPE_FLOAT) {
-    return 4;
-  }
-  if (type == PROTOBUF_C_TYPE_SFIXED64 || type == PROTOBUF_C_TYPE_FIXED64 ||
-      type == PROTOBUF_C_TYPE_DOUBLE) {
-    return 8;
-  }
-  return 1;
-}
-
-/**
- * Get the packed size of an array of same field type.
- *
- * \param field
- *      Field descriptor.
- * \param count
- *      Number of elements of this type.
- * \param array
- *      The elements to get the size of.
- * \return
- *      Number of bytes required.
- */
-static size_t get_packed_payload_length(const ProtobufCFieldDescriptor *field,
-                                        unsigned count, const void *array) {
-  unsigned rv = 0;
-  unsigned i;
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-      return count * 4;
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-      return count * 8;
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32: {
-      const int32_t *arr = (const int32_t *)array;
-      for (i = 0; i < count; i++) rv += int32_size(arr[i]);
-      break;
-    }
-    case PROTOBUF_C_TYPE_SINT32: {
-      const int32_t *arr = (const int32_t *)array;
-      for (i = 0; i < count; i++) rv += sint32_size(arr[i]);
-      break;
-    }
-    case PROTOBUF_C_TYPE_UINT32: {
-      const uint32_t *arr = (const uint32_t *)array;
-      for (i = 0; i < count; i++) rv += uint32_size(arr[i]);
-      break;
-    }
-    case PROTOBUF_C_TYPE_SINT64: {
-      const int64_t *arr = (const int64_t *)array;
-      for (i = 0; i < count; i++) rv += sint64_size(arr[i]);
-      break;
-    }
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64: {
-      const uint64_t *arr = (const uint64_t *)array;
-      for (i = 0; i < count; i++) rv += uint64_size(arr[i]);
-      break;
-    }
-    case PROTOBUF_C_TYPE_BOOL:
-      return count;
-    default:
-      PROTOBUF_C__ASSERT_NOT_REACHED();
-  }
-  return rv;
-}
-
-/**
- * Pack an array of same field type to a virtual buffer.
- *
- * \param field
- *      Field descriptor.
- * \param count
- *      Number of elements of this type.
- * \param array
- *      The elements to get the size of.
- * \param[out] buffer
- *      Virtual buffer to append data to.
- * \return
- *      Number of bytes packed.
- */
-static size_t pack_buffer_packed_payload(const ProtobufCFieldDescriptor *field,
-                                         unsigned count, const void *array,
-                                         ProtobufCBuffer *buffer) {
-  uint8_t scratch[16];
-  size_t rv = 0;
-  unsigned i;
-
-  switch (field->type) {
-    case PROTOBUF_C_TYPE_SFIXED32:
-    case PROTOBUF_C_TYPE_FIXED32:
-    case PROTOBUF_C_TYPE_FLOAT:
-#if !defined(WORDS_BIGENDIAN)
-      rv = count * 4;
-      goto no_packing_needed;
-#else
-      for (i = 0; i < count; i++) {
-        unsigned len = fixed32_pack(((uint32_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-#endif
-    case PROTOBUF_C_TYPE_SFIXED64:
-    case PROTOBUF_C_TYPE_FIXED64:
-    case PROTOBUF_C_TYPE_DOUBLE:
-#if !defined(WORDS_BIGENDIAN)
-      rv = count * 8;
-      goto no_packing_needed;
-#else
-      for (i = 0; i < count; i++) {
-        unsigned len = fixed64_pack(((uint64_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-#endif
-    case PROTOBUF_C_TYPE_ENUM:
-    case PROTOBUF_C_TYPE_INT32:
-      for (i = 0; i < count; i++) {
-        unsigned len = int32_pack(((int32_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_SINT32:
-      for (i = 0; i < count; i++) {
-        unsigned len = sint32_pack(((int32_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_UINT32:
-      for (i = 0; i < count; i++) {
-        unsigned len = uint32_pack(((uint32_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_SINT64:
-      for (i = 0; i < count; i++) {
-        unsigned len = sint64_pack(((int64_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_INT64:
-    case PROTOBUF_C_TYPE_UINT64:
-      for (i = 0; i < count; i++) {
-        unsigned len = uint64_pack(((uint64_t *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      break;
-    case PROTOBUF_C_TYPE_BOOL:
-      for (i = 0; i < count; i++) {
-        unsigned len = boolean_pack(((protobuf_c_boolean *)array)[i], scratch);
-        buffer->append(buffer, len, scratch);
-        rv += len;
-      }
-      return count;
-    default:
-      PROTOBUF_C__ASSERT_NOT_REACHED();
-  }
-  return rv;
-
-#if !defined(WORDS_BIGENDIAN)
-no_packing_needed:
-  buffer->append(buffer, rv, array);
-  return rv;
-#endif
-}
-
 static inline int int_range_lookup(unsigned n_ranges,
                                    const ProtobufCIntRange *ranges, int value) {
   unsigned n;
@@ -2638,147 +2096,3 @@ protobuf_c_boolean protobuf_c_message_check(const ProtobufCMessage *message) {
 
 typedef void (*GenericHandler)(void *service, const ProtobufCMessage *input,
                                ProtobufCClosure closure, void *closure_data);
-void protobuf_c_service_invoke_internal(ProtobufCService *service,
-                                        unsigned method_index,
-                                        const ProtobufCMessage *input,
-                                        ProtobufCClosure closure,
-                                        void *closure_data) {
-  GenericHandler *handlers;
-  GenericHandler handler;
-
-  /*
-   * Verify that method_index is within range. If this fails, you are
-   * likely invoking a newly added method on an old service. (Although
-   * other memory corruption bugs can cause this assertion too.)
-   */
-  assert(method_index < service->descriptor->n_methods);
-
-  /*
-   * Get the array of virtual methods (which are enumerated by the
-   * generated code).
-   */
-  handlers = (GenericHandler *)(service + 1);
-
-  /*
-   * Get our method and invoke it.
-   * \todo Seems like handler == NULL is a situation that needs handling.
-   */
-  handler = handlers[method_index];
-  (*handler)(service, input, closure, closure_data);
-}
-
-void protobuf_c_service_generated_init(
-    ProtobufCService *service, const ProtobufCServiceDescriptor *descriptor,
-    ProtobufCServiceDestroy destroy) {
-  ASSERT_IS_SERVICE_DESCRIPTOR(descriptor);
-  service->descriptor = descriptor;
-  service->destroy = destroy;
-  service->invoke = protobuf_c_service_invoke_internal;
-  memset(service + 1, 0, descriptor->n_methods * sizeof(GenericHandler));
-}
-
-void protobuf_c_service_destroy(ProtobufCService *service) {
-  service->destroy(service);
-}
-
-/* --- querying the descriptors --- */
-
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value_by_name(
-    const ProtobufCEnumDescriptor *desc, const char *name) {
-  unsigned start = 0;
-  unsigned count;
-
-  if (desc == NULL || desc->values_by_name == NULL) return NULL;
-
-  count = desc->n_value_names;
-
-  while (count > 1) {
-    unsigned mid = start + count / 2;
-    int rv = strcmp(desc->values_by_name[mid].name, name);
-    if (rv == 0)
-      return desc->values + desc->values_by_name[mid].index;
-    else if (rv < 0) {
-      count = start + count - (mid + 1);
-      start = mid + 1;
-    } else
-      count = mid - start;
-  }
-  if (count == 0) return NULL;
-  if (strcmp(desc->values_by_name[start].name, name) == 0)
-    return desc->values + desc->values_by_name[start].index;
-  return NULL;
-}
-
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value(
-    const ProtobufCEnumDescriptor *desc, int value) {
-  int rv = int_range_lookup(desc->n_value_ranges, desc->value_ranges, value);
-  if (rv < 0) return NULL;
-  return desc->values + rv;
-}
-
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field_by_name(
-    const ProtobufCMessageDescriptor *desc, const char *name) {
-  unsigned start = 0;
-  unsigned count;
-  const ProtobufCFieldDescriptor *field;
-
-  if (desc == NULL || desc->fields_sorted_by_name == NULL) return NULL;
-
-  count = desc->n_fields;
-
-  while (count > 1) {
-    unsigned mid = start + count / 2;
-    int rv;
-    field = desc->fields + desc->fields_sorted_by_name[mid];
-    rv = strcmp(field->name, name);
-    if (rv == 0)
-      return field;
-    else if (rv < 0) {
-      count = start + count - (mid + 1);
-      start = mid + 1;
-    } else
-      count = mid - start;
-  }
-  if (count == 0) return NULL;
-  field = desc->fields + desc->fields_sorted_by_name[start];
-  if (strcmp(field->name, name) == 0) return field;
-  return NULL;
-}
-
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field(
-    const ProtobufCMessageDescriptor *desc, unsigned value) {
-  int rv = int_range_lookup(desc->n_field_ranges, desc->field_ranges, value);
-  if (rv < 0) return NULL;
-  return desc->fields + rv;
-}
-
-const ProtobufCMethodDescriptor *
-protobuf_c_service_descriptor_get_method_by_name(
-    const ProtobufCServiceDescriptor *desc, const char *name) {
-  unsigned start = 0;
-  unsigned count;
-
-  if (desc == NULL || desc->method_indices_by_name == NULL) return NULL;
-
-  count = desc->n_methods;
-
-  while (count > 1) {
-    unsigned mid = start + count / 2;
-    unsigned mid_index = desc->method_indices_by_name[mid];
-    const char *mid_name = desc->methods[mid_index].name;
-    int rv = strcmp(mid_name, name);
-
-    if (rv == 0) return desc->methods + desc->method_indices_by_name[mid];
-    if (rv < 0) {
-      count = start + count - (mid + 1);
-      start = mid + 1;
-    } else {
-      count = mid - start;
-    }
-  }
-  if (count == 0) return NULL;
-  if (strcmp(desc->methods[desc->method_indices_by_name[start]].name, name) ==
-      0)
-    return desc->methods + desc->method_indices_by_name[start];
-  return NULL;
-}
diff --git a/src/common/protobuf-c.h b/src/common/protobuf-c.h
index a04559d79a02cf11d2b8f5f168c34377c313bc9a..bd85695b868af6c7b91590196339bc4f7826a256 100644
--- a/src/common/protobuf-c.h
+++ b/src/common/protobuf-c.h
@@ -798,76 +798,6 @@ uint32_t protobuf_c_version_number(void);
  */
 #define PROTOBUF_C_MIN_COMPILER_VERSION 1000000
 
-/**
- * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by name.
- *
- * \param desc
- *      The `ProtobufCEnumDescriptor` object.
- * \param name
- *      The `name` field from the corresponding `ProtobufCEnumValue` object to
- *      match.
- * \return
- *      A `ProtobufCEnumValue` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value_by_name(
-    const ProtobufCEnumDescriptor *desc, const char *name);
-
-/**
- * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by numeric
- * value.
- *
- * \param desc
- *      The `ProtobufCEnumDescriptor` object.
- * \param value
- *      The `value` field from the corresponding `ProtobufCEnumValue` object to
- *      match.
- *
- * \return
- *      A `ProtobufCEnumValue` object.
- * \retval NULL
- *      If not found.
- */
-PROTOBUF_C__API
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value(
-    const ProtobufCEnumDescriptor *desc, int value);
-
-/**
- * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
- * the name of the field.
- *
- * \param desc
- *      The `ProtobufCMessageDescriptor` object.
- * \param name
- *      The name of the field.
- * \return
- *      A `ProtobufCFieldDescriptor` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field_by_name(
-    const ProtobufCMessageDescriptor *desc, const char *name);
-
-/**
- * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
- * the tag value of the field.
- *
- * \param desc
- *      The `ProtobufCMessageDescriptor` object.
- * \param value
- *      The tag value of the field.
- * \return
- *      A `ProtobufCFieldDescriptor` object.
- * \retval NULL
- *      If not found.
- */
-PROTOBUF_C__API
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field(
-    const ProtobufCMessageDescriptor *desc, unsigned value);
-
 /**
  * Determine the number of bytes required to store the serialised message.
  *
@@ -947,33 +877,6 @@ PROTOBUF_C__API
 void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
                              void *message);
 
-/**
- * Free a service.
- *
- * \param service
- *      The service object to free.
- */
-PROTOBUF_C__API
-void protobuf_c_service_destroy(ProtobufCService *service);
-
-/**
- * Look up a `ProtobufCMethodDescriptor` by name.
- *
- * \param desc
- *      Service descriptor.
- * \param name
- *      Name of the method.
- *
- * \return
- *      A `ProtobufCMethodDescriptor` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCMethodDescriptor *
-protobuf_c_service_descriptor_get_method_by_name(
-    const ProtobufCServiceDescriptor *desc, const char *name);
-
 /**
  * Initialise a `ProtobufCBufferSimple` object.
  */
@@ -1011,18 +914,6 @@ PROTOBUF_C__API
 void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len,
                                      const unsigned char *data);
 
-PROTOBUF_C__API
-void protobuf_c_service_generated_init(
-    ProtobufCService *service, const ProtobufCServiceDescriptor *descriptor,
-    ProtobufCServiceDestroy destroy);
-
-PROTOBUF_C__API
-void protobuf_c_service_invoke_internal(ProtobufCService *service,
-                                        unsigned method_index,
-                                        const ProtobufCMessage *input,
-                                        ProtobufCClosure closure,
-                                        void *closure_data);
-
 /**@}*/
 
 PROTOBUF_C__END_DECLS
diff --git a/src/common/type_define.h b/src/common/type_define.h
index 63665bf7933f773b2b3de40ade9c700e3e93e6a9..c26cdd91e0694d44cca9443503d3e263ee21f201 100644
--- a/src/common/type_define.h
+++ b/src/common/type_define.h
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <map>
 #include <string>
-#include <unordered_set>
 #include <vector>
 #include "framework/attribute.h"
 #include "framework/scope.h"
@@ -40,13 +39,6 @@ using OpCreator = std::function<framework::OperatorBase<Dtype> *(
     const framework::AttributeMap & /*attrs*/,
     std::shared_ptr<framework::Scope> /*scope*/)>;
 
-using GradOpMakerFN =
-    std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
-        const framework::OpDesc &,
-        const std::unordered_set<std::string> & /*no_grad_set*/,
-        std::unordered_map<std::string, std::string> * /*grad_to_var*/,
-        const std::vector<framework::BlockDesc *> &grad_block)>;
-
 using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
                                           framework::BlockDesc * /*block*/)>;
 
diff --git a/src/common/types.h b/src/common/types.h
index 952ee90cedd9fccfa006761467caa35fcceb3ad4..092c251552ef8007a4000f6b84dce1bbc4059b91 100644
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <string>
 #include <unordered_map>
-#include <utility>
 
 namespace paddle_mobile {
 enum class Precision : int { FP32 = 0 };
@@ -80,6 +79,7 @@ static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
 static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU =
     "fusion_conv_add_relu";
 static const std::string G_OP_TYPE_FC = "fc";
+static const std::string G_OP_TYPE_CONV_ADD = "conv_add";
 static const std::string G_OP_TYPE_LRN = "lrn";
 static const std::string G_OP_TYPE_MUL = "mul";
 static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
@@ -116,5 +116,6 @@ static std::unordered_map<
         {G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}},
         {G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
         {G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}};
 }  // namespace paddle_mobile
diff --git a/src/common/variant.cpp b/src/common/variant.cpp
deleted file mode 100644
index 6bbf34eae933d69d00517c723326111901444ab0..0000000000000000000000000000000000000000
--- a/src/common/variant.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
diff --git a/src/common/variant.h b/src/common/variant.h
index c198ff511c2e90e0387238aede9f1373f0379d3c..7fbf0ec0772f102165770dc9c8e053f469965f10 100644
--- a/src/common/variant.h
+++ b/src/common/variant.h
@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <iostream>
-
+#include "common/enforce.h"
 #include "common/log.h"
 
 #pragma once
@@ -57,15 +56,11 @@ class RawData {
   char data[size];
   RawData() {}
   RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
-  //      void operator=(const RawData &raw_data){
-  //        strcpy(data, raw_data.data);
-  //      }
 };
 
 template <typename... Ts>
 struct Variant {
   Variant(const Variant &variant) {
-    //        std::cout << " 赋值构造函数 " << std::endl;
     type_id = variant.type_id;
     data = variant.data;
   }
@@ -87,8 +82,7 @@ struct Variant {
     if (type_id == typeid(T).hash_code()) {
       return *const_cast<T *>(reinterpret_cast<const T *>(&data));
     } else {
-      //      std::cout << " bad cast in variant " << std::endl;
-      throw std::bad_cast();
+      PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant ");
     }
   }
 
diff --git a/src/framework/attribute.cpp b/src/framework/attribute.cpp
index 01b0ed523c2ccf125c4bb81d3d50ff5e4b289c7e..8b150f4e9e6aa3ccc30f13f661ff9cd6be79ae7a 100644
--- a/src/framework/attribute.cpp
+++ b/src/framework/attribute.cpp
@@ -17,14 +17,8 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
 
-/*
- * Variant<int, float, std::string, std::vector<int>, std::vector<float>,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
-          int64_t>
- * */
-
 struct PrintVistor : Vistor<Print &> {
-  PrintVistor(Print &printer) : printer_(printer) {}
+  explicit PrintVistor(Print &printer) : printer_(printer) {}
   template <typename T>
   Print &operator()(const T &value) {
     printer_ << value;
diff --git a/src/framework/attribute.h b/src/framework/attribute.h
index b77d94521e8be9bdfdfd00ca1628bdefc60d688d..3b6608cf03e7f786ad8c087dc869516cb6220edb 100644
--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -14,7 +14,10 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
 #include <unordered_map>
+#include <vector>
+
 #include "common/enforce.h"
 #include "common/log.h"
 #include "common/variant.h"
@@ -22,28 +25,15 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace framework {
+using std::string;
+using std::vector;
 
 class BlockDesc;
 
 class Attribute {
  public:
-  /*
-   *  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT = 1,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING = 2,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS = 3,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS = 4,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS = 5,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9
-    PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
-   *
-   * */
   static Attribute GetAttrValue(
       PaddleMobile__Framework__Proto__OpDesc__Attr *attr_desc) {
-    //    std::cout << "begin get attr value" << std::endl;
     Attribute attr;
     switch (attr_desc->type) {
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN: {
@@ -63,35 +53,35 @@ class Attribute {
         break;
       }
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS: {
-        std::vector<bool> val(attr_desc->n_bools);
+        vector<bool> val(attr_desc->n_bools);
         for (int i = 0; i < attr_desc->n_bools; ++i) {
           val[i] = attr_desc->bools[i];
         }
-        attr.Set<std::vector<bool>>(val);
+        attr.Set<vector<bool>>(val);
         break;
       }
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS: {
-        std::vector<int> val(attr_desc->n_ints);
+        vector<int> val(attr_desc->n_ints);
         for (int i = 0; i < attr_desc->n_ints; ++i) {
           val[i] = attr_desc->ints[i];
         }
-        attr.Set<std::vector<int>>(val);
+        attr.Set<vector<int>>(val);
         break;
       }
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS: {
-        std::vector<float> val(attr_desc->n_floats);
+        vector<float> val(attr_desc->n_floats);
         for (int i = 0; i < attr_desc->n_floats; ++i) {
           val[i] = attr_desc->floats[i];
         }
-        attr.Set<std::vector<float>>(val);
+        attr.Set<vector<float>>(val);
         break;
       }
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS: {
-        std::vector<std::string> val(attr_desc->n_strings);
+        vector<string> val(attr_desc->n_strings);
         for (int i = 0; i < attr_desc->n_strings; ++i) {
           val[i] = attr_desc->strings[i];
         }
-        attr.Set<std::vector<std::string>>(val);
+        attr.Set<vector<string>>(val);
         break;
       }
       case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG: {
@@ -122,47 +112,41 @@ class Attribute {
       return vistor(attr.variant_.Get<int>());
     } else if (attr.variant_.TypeId() == typeid(float).hash_code()) {
       return vistor(attr.variant_.Get<float>());
-    } else if (attr.variant_.TypeId() == typeid(std::string).hash_code()) {
-      return vistor(attr.variant_.Get<std::string>());
-    } else if (attr.variant_.TypeId() == typeid(std::vector<int>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<int>>());
-    } else if (attr.variant_.TypeId() ==
-               typeid(std::vector<float>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<float>>());
-    } else if (attr.variant_.TypeId() ==
-               typeid(std::vector<std::string>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<std::string>>());
+    } else if (attr.variant_.TypeId() == typeid(string).hash_code()) {
+      return vistor(attr.variant_.Get<string>());
+    } else if (attr.variant_.TypeId() == typeid(vector<int>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<int>>());
+    } else if (attr.variant_.TypeId() == typeid(vector<float>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<float>>());
+    } else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<string>>());
     } else if (attr.variant_.TypeId() == typeid(bool).hash_code()) {
       return vistor(attr.variant_.Get<bool>());
-    } else if (attr.variant_.TypeId() ==
-               typeid(std::vector<bool>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<bool>>());
+    } else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<bool>>());
     } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
       return vistor(attr.variant_.Get<int64_t>());
     } else {
-      throw std::bad_exception();
+      PADDLE_MOBILE_THROW_EXCEPTION("type not support");
     }
   }
 
  private:
-  Variant<int, float, std::string, std::vector<int>, std::vector<float>,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
-          int64_t>
+  Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
+          vector<bool>, BlockDesc *, int64_t>
       variant_;
 };
 
-using AttributeMap = std::unordered_map<std::string, Attribute>;
+using AttributeMap = std::unordered_map<string, Attribute>;
 
 class AttrReader {
  public:
   explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
 
   template <typename T>
-  inline T Get(const std::string &name) const {
-    //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
-    //          be in
-    //          AttributeMap",
-    //                         name);
+  inline T Get(const string &name) const {
+    PADDLE_MOBILE_ENFORCE(attrs_.count(name) != 0,
+                          "%s should  be in AttributeMap", name);
     return ((Attribute)attrs_.at(name)).Get<T>();
   }
 
diff --git a/src/framework/data_layout.h b/src/framework/data_layout.h
index 72c16c36733c0660ae2cf46de31031370eed444a..3b31445707a887a2715afd0b9e7192ad76724351 100644
--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 
 #include <cctype>
-#include <iostream>
 #include <string>
 
 namespace paddle_mobile {
@@ -40,7 +39,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
   } else if (s == "ANYLAYOUT") {
     return DataLayout::kAnyLayout;
   } else {
-    //    std::cout << "Unknown storage order string: %s", s;
+    PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
   }
 }
 
@@ -54,14 +53,8 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) {
       return "ANY_LAYOUT";
     default:
       break;
-      //      std::cout << "unknown DataLayou %d", data_layout;
   }
 }
 
-inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
-  out << DataLayoutToString(l);
-  return out;
-}
-
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/data_transform.cpp b/src/framework/data_transform.cpp
deleted file mode 100644
index a6be4d2fcbbc6e0dd2adb9f71d644b2bd60d4259..0000000000000000000000000000000000000000
--- a/src/framework/data_transform.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "framework/data_transform.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-static void PassTensorData(Tensor *from, Tensor *to) {
-  to->ShareDataWith(*from);
-  *from = Tensor();
-}
-
-void DataTransform(const OpKernelType &expected_kernel_type,
-                   const OpKernelType &kernel_type_for_var,
-                   const Tensor &input_tensor, Tensor *output_tensor) {
-  bool transformed = false;
-  Tensor in;
-  in.ShareDataWith(input_tensor);
-  Tensor out;
-
-  //  // do layout transform
-  //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-  //                          kernel_type_for_var.data_layout_)) {
-  //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do data type transform
-  //  if (expected_kernel_type.data_type_ !=
-  //  kernel_type_for_var.data_type_) {
-  //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do device transform
-  //  if (!platform::is_same_place(kernel_type_for_var.place_,
-  //                               expected_kernel_type.place_)) {
-  //    TransDataDevice(in, expected_kernel_type.place_, &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-  //  check!");
-  // get output data
-  output_tensor->ShareDataWith(in);
-}
-
-void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                            Variable *out_var) {
-  //  if (in_var.IsType<LoDTensor>()) {
-  //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-  //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-  //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-  //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-  //    tran_lod_tensor->ShareDataWith(tensor);
-  //  } else if (in_var.IsType<SelectedRows>()) {
-  //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-  //    auto* trans_selected_rows =
-  //    out_var.GetMutable<SelectedRows>();
-  //    trans_selected_rows->set_height(in_selected_rows.height());
-  //    trans_selected_rows->set_rows(in_selected_rows.rows());
-  //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-  //  } else {
-  //    PADDLE_THROW("unknown var type");
-  //  }
-}
-
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/src/framework/data_transform.h b/src/framework/data_transform.h
deleted file mode 100644
index b3947985d8b09e183c690b4d51093c2ae96e7d80..0000000000000000000000000000000000000000
--- a/src/framework/data_transform.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <functional>
-#include <utility>
-#include <vector>
-
-#include "framework/op_kernel_type.h"
-#include "framework/selected_rows.h"
-#include "framework/tensor.h"
-#include "framework/variable.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-void DataTransform(const OpKernelType &expected_kernel_type,
-                   const OpKernelType &kernel_type_for_var,
-                   const Tensor &input_tensor, Tensor *out);
-
-void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                            Variable *out_var);
-
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/src/framework/data_type.h b/src/framework/data_type.h
deleted file mode 100644
index ddfc0dcc4adf8e5897f5f4ea67f9514889863f32..0000000000000000000000000000000000000000
--- a/src/framework/data_type.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-namespace paddle_mobile {
-namespace framework {
-
-//    inline proto::VarType::Type ToDataType(std::type_index type) {
-//        using namespace paddle_mobile::framework::proto;
-//        if (typeid(float).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP32;
-//        } else if (typeid(double).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP64;
-//        } else if (typeid(int).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT32;
-//        } else if (typeid(int64_t).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT64;
-//        } else if (typeid(bool).hash_code() == type.hash_code()) {
-//            return proto::VarType::BOOL;
-//        } else {
-////            PADDLE_THROW("Not supported");
-//        }
-//    }
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/src/framework/ddim.cpp b/src/framework/ddim.cpp
index db6f2cd6aba92fec6a42839c0e3198ac749807b0..3a4a3abb7cd4c632251e6f0190e32c99dd232c01 100644
--- a/src/framework/ddim.cpp
+++ b/src/framework/ddim.cpp
@@ -63,9 +63,6 @@ void make_ddim(DDim &ddim, const int64_t *dims, int n) {
       ddim = make_dim<9>(dims);
       break;
     default:
-      //      std::cout << "Dynamic dimensions must have between [1,
-      //      9]
-      //      dimensions.";
       break;
   }
 }
@@ -133,9 +130,6 @@ int64_t DDim::operator[](int idx) const {
 int DDim::size() const { return arity(*this); }
 
 bool DDim::operator==(DDim d) const {
-  //  if (var.which() != d.getVar().which()) {
-  //    return false;
-  //  } else {
   std::vector<int64_t> v1 = vectorize(*this);
   std::vector<int64_t> v2 = vectorize(d);
 
@@ -157,7 +151,7 @@ DDim DDim::operator+(DDim d) const {
 
   std::vector<int64_t> v3;
 
-  assert(v1.size() == v2.size());
+  PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() != v2.size()");
 
   for (unsigned int i = 0; i < v1.size(); i++) {
     v3.push_back(v1[i] + v2[i]);
@@ -172,7 +166,7 @@ DDim DDim::operator*(DDim d) const {
 
   std::vector<int64_t> v3;
 
-  assert(v1.size() == v2.size());
+  PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() == v2.size()");
 
   for (unsigned int i = 0; i < v1.size(); i++) {
     v3.push_back(v1[i] * v2[i]);
@@ -183,7 +177,7 @@ DDim DDim::operator*(DDim d) const {
 
 int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
 
-void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
+void set(DDim *ddim, int idx, int value) { (*ddim)[idx] = value; }
 
 /// @cond HIDDEN
 struct VectorizeVisitor : Vistor<void> {
@@ -235,13 +229,10 @@ struct SliceVectorizeVisitor : Vistor<void> {
 
   SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
       : vector(v), begin(b), end(e) {
-    //    PADDLE_ENFORCE(begin < end,
-    //                   "Begin index must be less than end index in
-    //                   ddim
-    //                   slice.");
-    //    PADDLE_ENFORCE(begin >= 0,
-    //                   "Begin index can't be less than zero in
-    //                   ddim slice.");
+    PADDLE_MOBILE_ENFORCE(
+        begin < end, "Begin index must be less than end index in ddim slice.");
+    PADDLE_MOBILE_ENFORCE(begin >= 0,
+                          "Begin index can't be less than zero in ddim slice.");
   }
 
   template <int S>
@@ -267,9 +258,7 @@ DDim slice_ddim(const DDim &ddim, int begin, int end) {
   std::vector<int64_t> vec;
   vec.reserve(end - begin);
   SliceVectorizeVisitor visitor(vec, begin, end);
-  //  boost::apply_visitor(visitor, dim);
   DDim::ApplyVistor(visitor, ddim);
-  //  visitor(ddim.var.Get<Dim<4>>());
   return make_ddim(vec);
 }
 
@@ -287,31 +276,19 @@ struct ArityVisitor : Vistor<int> {
 int arity(const DDim &d) {
   ArityVisitor arityVisitor = ArityVisitor();
   return DDim::ApplyVistor(arityVisitor, d);
-  //  return arityVisitor(d.var.Get<Dim<4>>());
-  //  return boost::apply_visitor(ArityVisitor(), d); }
 }
-/// \cond HIDDEN
-
-/// \endcond
 
-struct OSVistor : Vistor<std::ostream &> {
-  OSVistor(std::ostream &os) : os_(os) {}
-
-  template <int D>
-  std::ostream &operator()(Dim<D> dim) const {
-    return os_ << dim;
+#ifdef PADDLE_MOBILE_DEBUG
+Print &operator<<(Print &printer, const DDim &ddim) {
+  for (int j = 0; j < ddim.size(); ++j) {
+    printer << ddim[j] << " ";
   }
 
- private:
-  std::ostream &os_;
-};
-
-std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
-  auto vistor = OSVistor(os);
-  DDim::ApplyVistor(vistor, ddim);
-  return os;
+  return printer;
 }
 
+#endif
+
 DDim::DDim(std::initializer_list<int64_t> init_list) {
   *this = make_ddim(init_list);
 }
diff --git a/src/framework/ddim.h b/src/framework/ddim.h
index 88039b2e0a57b4f79247129d1d95e4d5954da6c6..c1d917dff612de3a42168c47d0bacd3ac7bdd3ad 100644
--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
@@ -14,10 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include <assert.h>
 #include <initializer_list>
-#include <stdexcept>
 #include <vector>
+#include "common/enforce.h"
 #include "common/variant.h"
 #include "dim.h"
 
@@ -58,9 +57,7 @@ struct DDim {
     } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
       return vistor(d.var.Get<Dim<9>>());
     } else {
-      printf(" dim not support  \n");
-      throw std::bad_exception();
-      //        return typename Vistor::type_t();
+      DLOG << " dim not support";
     }
   }
 
@@ -83,17 +80,6 @@ struct DDim {
 
   int64_t operator[](int idx) const;
 
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
-  //    return var.apply_visitor(visitor);
-  //  }
-  //
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor)
-  //  const {
-  //    return var.apply_visitor(visitor);
-  //  }
-
   DDimVar getVar() { return var; }
 
   bool operator==(DDim d) const;
@@ -126,7 +112,7 @@ DDim make_ddim(std::initializer_list<int64_t> dims);
 
 int64_t get(const DDim &dim, int idx);
 
-void set(DDim &dim, int idx, int val);
+void set(DDim *dim, int idx, int val);
 
 std::vector<int64_t> vectorize(const DDim &ddim);
 
@@ -151,8 +137,6 @@ DDim slice_ddim(const DDim &dim, int begin, int end);
 
 int arity(const DDim &ddim);
 
-std::ostream &operator<<(std::ostream &, const DDim &);
-
 // Reshape a tensor to a matrix. The matrix's first dimension(column
 // length)
 // will be the product of tensor's first `num_col_dims` dimensions.
@@ -163,5 +147,9 @@ DDim flatten_to_1d(const DDim &src);
 DDim stride(const DDim &ddim);
 
 DDim stride_numel(const DDim &ddim);
+
+#ifdef PADDLE_MOBILE_DEBUG
+Print &operator<<(Print &printer, const DDim &ddim);
+#endif
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/dim.h b/src/framework/dim.h
index 6740386c057d6e3a3466219170073cf65b29e03e..38e62df99519c3e869dc0fd2ae71beed28370122 100644
--- a/src/framework/dim.h
+++ b/src/framework/dim.h
@@ -14,13 +14,7 @@ limitations under the License. */
 
 #pragma once
 
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <type_traits>
-
-#include "platform/hostdevice.h"
-
+#include "common/enforce.h"
 namespace paddle_mobile {
 namespace framework {
 
@@ -30,42 +24,35 @@ struct Dim {
   static constexpr int dimensions = i;
 
   template <typename... Args>
-  HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
+  Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
     static_assert(sizeof...(_tail) == i - 1,
                   "Dim initialized with the wrong number of parameters");
   }
 
-  HOSTDEVICE
   Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
 
-  HOSTDEVICE
   Dim() : head(0), tail() {}
 
   /** Construct a Dim from a linear index and size.  Uses Fortran
    * order
    * indexing. */
-  HOSTDEVICE
   Dim(int64_t idx, const Dim<i> &size)
       : head(idx % size.head), tail(idx / size.head, size.tail) {}
 
   /** Construct a Dim with each dimension set to the given index */
-  HOSTDEVICE
   Dim(int64_t idx) : head(idx), tail(idx) {}
 
-  HOSTDEVICE
   bool operator==(const Dim<i> &o) const {
     return (head == o.head) && (tail == o.tail);
   }
 
-  HOSTDEVICE
   bool operator!=(const Dim<i> &o) const { return !(*this == o); }
 
-  HOSTDEVICE
   int64_t &operator[](int idx);
-  HOSTDEVICE
+
   int64_t operator[](int idx) const;
 
-  HOST std::string to_string() const;
+  std::string to_string() const;
 
   int64_t head;
   Dim<i - 1> tail;
@@ -76,32 +63,22 @@ template <>
 struct Dim<0> {
   static constexpr int dimensions = 0;
 
-  HOSTDEVICE
   Dim(int64_t _head) {}
 
-  HOSTDEVICE
   Dim() {}
 
-  HOSTDEVICE
   Dim(int idx, const Dim<0> &size) {
-#ifndef __CUDA_ARCH__
     if (idx > 0) {
-      throw std::invalid_argument("Index out of range.");
+      PADDLE_MOBILE_THROW_EXCEPTION("Index out of range.")
     }
-#else
-    PADDLE_ASSERT(idx == 0);
-#endif
   }
 
-  HOSTDEVICE
   bool operator==(const Dim<0> &o) const { return true; }
 
-  HOSTDEVICE
   bool operator!=(const Dim<0> &o) const { return false; }
 
-  HOSTDEVICE
   int64_t &operator[](int idx);
-  HOSTDEVICE
+
   int64_t operator[](int idx) const;
 };
 
@@ -112,12 +89,12 @@ template <int i>
 struct DimGetter {
   // Return a copy if Dim is const
   template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
     return DimGetter<i - 1>::impl(d.tail);
   }
   // Return a reference if Dim is mutable
   template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
     return DimGetter<i - 1>::impl(d.tail);
   }
 };
@@ -127,25 +104,22 @@ template <>
 struct DimGetter<0> {
   // Return a copy if Dim is const
   template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
     return d.head;
   }
   // Return a reference if Dim is mutable
   template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
     return d.head;
   }
 };
 
 template <int D>
-HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
-#ifndef __CUDA_ARCH__
+int64_t &indexer(Dim<D> &dim, int idx) {
   if (idx < 0) {
-    throw std::invalid_argument("Tried to access a negative dimension");
+    PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
   }
-#else
-  PADDLE_ASSERT(idx >= 0);
-#endif
+
   if (idx == 0) {
     return dim.head;
   }
@@ -153,31 +127,15 @@ HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
 }
 
 template <>
-HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
-#ifndef __CUDA_ARCH__
-  throw std::invalid_argument("Invalid index");
-#else
-  PADDLE_ASSERT(false);
-#if CUDA_VERSION < 8000
-  // On CUDA versions previous to 8.0, only __shared__ variables
-  // could be declared as static in the device code.
-  int64_t head = 0;
-#else
-  static int64_t head = 0;
-#endif
-  return head;
-#endif
+int64_t &indexer<0>(Dim<0> &dim, int idx) {
+  PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
 }
 
 template <int D>
-HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
-#ifndef __CUDA_ARCH__
+int64_t indexer(const Dim<D> &dim, int idx) {
   if (idx < 0) {
-    throw std::invalid_argument("Tried to access a negative dimension");
+    PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
   }
-#else
-  PADDLE_ASSERT(idx >= 0);
-#endif
   if (idx == 0) {
     return dim.head;
   }
@@ -185,102 +143,84 @@ HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
 }
 
 template <>
-HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
-#ifndef __CUDA_ARCH__
-  throw std::invalid_argument("Invalid index");
-#else
-  PADDLE_ASSERT(false);
-#if CUDA_VERSION < 8000
-  // On CUDA versions previous to 8.0, only __shared__ variables
-  // could be declared as static in the device code.
-  int64_t head = 0;
-#else
-  static int64_t head = 0;
-#endif
-  return head;
-#endif
+int64_t indexer<0>(const Dim<0> &dim, int idx) {
+  PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
 }
 
 }  // namespace
 // Static access to constant Dim
 template <int i, int l>
-HOSTDEVICE int64_t get(const Dim<l> &d) {
+int64_t get(const Dim<l> &d) {
   return DimGetter<i>::impl(d);
 }
 
 // Static access to mutable Dim
 template <int i, int l>
-HOSTDEVICE int64_t &get(Dim<l> &d) {
+int64_t &get(Dim<l> &d) {
   return DimGetter<i>::impl(d);
 }
 
 // Dynamic access to constant Dim
 template <int l>
-HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
+int64_t Dim<l>::operator[](int i) const {
   //  std::cout << "l: " << l << std::endl;
   return indexer(*this, i);
 }
 
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
+int64_t &Dim<l>::operator[](int i) {
   return indexer(*this, i);
 }
 
 // Dynamic access to constant Dim
-inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
-  return indexer(*this, i);
-}
+inline int64_t Dim<0>::operator[](int i) const { return indexer(*this, i); }
 
 // Dynamic access to mutable Dim
-inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
-  return indexer(*this, i);
-}
+inline int64_t &Dim<0>::operator[](int i) { return indexer(*this, i); }
 
 // Dynamic access to constant Dim
 // without std::enable_if will try to instantiate this on get<0>(d)
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
-                                                               int i) {
+typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d, int i) {
   return d[i];
 }
 
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
-                                                                 int i) {
+typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d, int i) {
   return d[i];
 }
 
 // Dot product of two dims
 template <int i>
-HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
+int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
   return a.head * b.head + linearize(a.tail, b.tail);
 }
 
 // Base case dot product of two Dims
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
+inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
   return 0;
 }
 
 // Product of a Dim
 template <int i>
-HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
+int64_t product(const Dim<i> &a, int prod = 1) {
   return prod * a.head * product(a.tail);
 }
 
 // Base case product of a Dim
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
+inline int64_t product(const Dim<0> &a, int prod) {
   return prod;
 }
 
 // Is 0 <= idx_i < size_i for all i?
 template <int i>
-HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
+bool contained(const Dim<i> &idx, const Dim<i> &size) {
   return ((0 <= idx.head) && (idx.head < size.head) &&
           contained(idx.tail, size.tail));
 }
@@ -288,7 +228,7 @@ HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
 // Base case of is 0 <= idx_i < size_i ?
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
+inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
   return true;
 }
 
@@ -296,7 +236,7 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
  * \brief Compute exclusive prefix-multiply of a Dim.
  */
 template <int i>
-HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
+Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
   return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
 }
 
@@ -304,7 +244,7 @@ HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
 // Base case of ex_prefix_mul
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
+inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
   return Dim<0>();
 }
 ///\endcond
@@ -313,18 +253,18 @@ HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
  * Add two dimensions together
  */
 template <int i>
-HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
   return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
 }
 
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
   return Dim<0>();
 }
 
 template <int i>
-HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
   return dim_plus(lhs, rhs);
 }
 
@@ -332,18 +272,18 @@ HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
  * Multiply two dimensions together
  */
 template <int i>
-HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
   return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
 }
 
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
   return Dim<0>();
 }
 
 template <int i>
-HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
   return dim_mult(lhs, rhs);
 }
 
@@ -358,7 +298,7 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
  */
 
 template <int i>
-HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
+Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
   int norm_stride = size.head == 1 ? 0 : stride.head;
   return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
 }
@@ -366,8 +306,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
 ///\cond HIDDEN
 
 template <>
-HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
-                                           const Dim<0> &stride) {
+inline Dim<0> normalize_strides(const Dim<0> &size, const Dim<0> &stride) {
   return Dim<0>();
 }
 
@@ -382,54 +321,9 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
  */
 
 template <typename... Args>
-HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
+Dim<sizeof...(Args)> make_dim(Args... idxes) {
   return Dim<sizeof...(Args)>(idxes...);
 }
 
-// Allows us to output a Dim
-// XXX For some reason, overloading fails to resolve this correctly
-template <int i>
-typename std::enable_if<(i > 1), std::ostream &>::type operator<<(
-    std::ostream &os, const Dim<i> &d) {
-  os << d.head << ", " << d.tail;
-  return os;
-}
-
-// Base case that allows us to output a Dim
-// XXX I wish this could be an overload instead of a template
-template <int i>
-typename std::enable_if<(i == 1), std::ostream &>::type operator<<(
-    std::ostream &os, const Dim<i> &d) {
-  os << d.head;
-  return os;
-}
-
-inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
-  return os;
-}
-
-template <int i>
-HOST std::string Dim<i>::to_string() const {
-  std::stringstream stream;
-
-  stream << *this;
-
-  return stream.str();
-}
-
-template <int D>
-HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
-  Dim<D> result;
-
-  for (int i = 0; i < D - 1; ++i) {
-    result[i] = linear_index % extents[i];
-    linear_index /= extents[i];
-  }
-
-  result[D - 1] = linear_index;
-
-  return result;
-}
-
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/lod_tensor.cpp b/src/framework/lod_tensor.cpp
index 48c2c46989e2d477ed0a005f25a8252da0955f13..0a57d29a0c05c009299d43b3b9f5a59b2c3dc341 100644
--- a/src/framework/lod_tensor.cpp
+++ b/src/framework/lod_tensor.cpp
@@ -13,72 +13,55 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "lod_tensor.h"
-#include <stdint.h>
-#include <string.h>
-#include <algorithm>
-#include <iterator>
 
 namespace paddle_mobile {
 namespace framework {
 
-std::ostream &operator<<(std::ostream &os, const LoD &lod) {
-  os << "{";
-  for (auto &v : lod) {
-    os << "{";
-    bool is_first = true;
-    for (auto &i : v) {
-      if (is_first) {
-        os << i;
-        is_first = false;
-      } else {
-        os << ", " << i;
-      }
-    }
-    os << "}";
-  }
-  os << "}";
-
-  return os;
-}
-
-std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-  //  PADDLE_ENFORCE(t.type().hash_code() ==
-  //  typeid(float).hash_code());
-
-  //  if (!platform::is_cpu_place(t.place())) {
-  //    LoDTensor tt;
-  //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
-  //    platform::DeviceContextPool &pool =
-  //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
-  //    *pool.Get(t.place()); dev_ctx.Wait();
-  //
-  //    os << tt;
-  //    return os;
-  //  }
-
-  os << "dim: " << t.dims() << "\n";
-  os << "lod: " << t.lod() << "\n";
-
-  // only print first ten elements
-  int64_t size = t.numel() < 10 ? t.numel() : 10;
-  for (int64_t i = 0; i < size; ++i) {
-    os << t.data<float>()[i] << " ";
-  }
-
-  return os;
-}
-
-std::string LoDToString(const LoD &lod) {
-  std::ostringstream stream;
-  stream << lod;
-  return stream.str();
-}
+// std::ostream &operator<<(std::ostream &os, const LoD &lod) {
+//  os << "{";
+//  for (auto &v : lod) {
+//    os << "{";
+//    bool is_first = true;
+//    for (auto &i : v) {
+//      if (is_first) {
+//        os << i;
+//        is_first = false;
+//      } else {
+//        os << ", " << i;
+//      }
+//    }
+//    os << "}";
+//  }
+//  os << "}";
+//
+//  return os;
+//}
+//
+// std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
+//  PADDLE_MOBILE_ENFORCE(t.type().hash_code() == typeid(float).hash_code(),
+//                        "t.type() is not float");
+//  os << "dim: " << t.dims() << "\n";
+//  os << "lod: " << t.lod() << "\n";
+//  // only print first ten elements
+//  int64_t size = t.numel() < 10 ? t.numel() : 10;
+//  for (int64_t i = 0; i < size; ++i) {
+//    os << t.data<float>()[i] << " ";
+//  }
+//
+//  return os;
+//}
+
+// std::string LoDToString(const LoD &lod) {
+//  std::ostringstream stream;
+//  stream << lod;
+//  return stream.str();
+//}
 
 LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
                  size_t elem_end) {
-  //  PADDLE_ENFORCE_LT(level, in.size());
-  //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
-
+  PADDLE_MOBILE_ENFORCE(level < in.size(), "level should >= in.size()");
+  PADDLE_MOBILE_ENFORCE(elem_end < in[level].size(),
+                        "elem_end >= in[level].size()");
   LoD res;
   res.resize(in.size() - level);
   // copy the first level
@@ -152,7 +135,7 @@ bool CheckLoD(const LoD &in, int tensor_height) {
           if (a < b) return true;
           return false;
         })) {
-      std::cout << "ascending error";
+      PADDLE_MOBILE_THROW_EXCEPTION("ascending error")
       return false;
     }
   }
@@ -211,8 +194,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
   LoD sub_lod;
 
   for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
-    //    PADDLE_ENFORCE_LE(start_idx, end_idx);
-    //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
+    PADDLE_MOBILE_ENFORCE(start_idx <= end_idx, "start_idx > end_idx");
+    PADDLE_MOBILE_ENFORCE(end_idx < lod[level_idx].size(),
+                          "end_idx >= lod[level_idx].size()");
     std::vector<size_t> level_lens;
     for (size_t i = start_idx; i < end_idx; ++i) {
       level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
@@ -226,10 +210,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
 }
 
 void AppendLoD(LoD *lod, const LoD &lod_length) {
-  //  PADDLE_ENFORCE(
-  //      lod->empty() || lod->size() == lod_length.size(),
-  //      "The lod_length should has the same size with the appended
-  //      lod.");
+  PADDLE_MOBILE_ENFORCE(
+      lod->empty() || lod->size() == lod_length.size(),
+      "The lod_length should has the same size with the appended lod.");
   if (lod->empty()) {
     for (size_t i = 0; i < lod_length.size(); ++i) {
       lod->emplace_back(1, 0);  // size = 1, value = 0;
diff --git a/src/framework/lod_tensor.h b/src/framework/lod_tensor.h
index bab3db1389610e7ed5db1a387004bdf95267867f..3b34d664d3608dd361ed7c7bb549870284adcc33 100644
--- a/src/framework/lod_tensor.h
+++ b/src/framework/lod_tensor.h
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <memory>
 #include <string>
-#include <utility>
 #include <vector>
 #include "tensor.h"
 #include "tensor_util.h"
diff --git a/src/framework/op_info.h b/src/framework/op_info.h
index 7475d155232e31cf00dab6273200f5bc4671f2e9..16b3487955ce05721e6e3f3e79b6d8ebd180e020 100644
--- a/src/framework/op_info.h
+++ b/src/framework/op_info.h
@@ -25,9 +25,8 @@ template <typename Dtype>
 struct OpInfo {
   OpCreator<Dtype> creator_;
   const OpCreator<Dtype> &Creator() const {
-    //    PADDLE_ENFORCE_NOT_NULL(creator_,
-    //                            "Operator Creator has not been
-    //                            registered");
+    PADDLE_MOBILE_ENFORCE(creator_ != nullptr,
+                          "Operator Creator has not been registered");
     return creator_;
   }
 };
@@ -48,17 +47,15 @@ class OpInfoMap {
   }
 
   void Insert(const std::string &type, const OpInfo<Dtype> &info) {
-    //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
-    //    registered", type);
+    PADDLE_MOBILE_ENFORCE(!Has(type), "Operator %s has been registered",
+                          type.c_str());
     map_.insert({type, info});
   }
 
   const OpInfo<Dtype> &Get(const std::string &type) const {
     auto op_info_ptr = GetNullable(type);
-    //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
-    //    been
-    //    registered",
-    //                            type);
+    PADDLE_MOBILE_ENFORCE(op_info_ptr != nullptr,
+                          "Operator %s has not been registered", type.c_str());
     return *op_info_ptr;
   }
 
diff --git a/src/framework/operator.h b/src/framework/operator.h
index cb27985244a1dd9e92a54edce9f15fd3d8defaad..d9b74e8887944774aefadf5ead0a74b1f7eac79d 100644
--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -16,7 +16,6 @@ limitations under the License. */
 
 #include <map>
 #include <string>
-#include <utility>
 #include <vector>
 
 #include "common/enforce.h"
@@ -27,7 +26,6 @@ limitations under the License. */
 #include "framework/op_info.h"
 #include "framework/op_kernel_type.h"
 #include "framework/op_registry.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/block_desc.h"
 #include "framework/program/program-optimize/node.h"
 #include "framework/scope.h"
@@ -52,7 +50,7 @@ static T *GetVarValue(const string &key, const VariableNameMap &var_map,
 }
 
 template <typename Dtype>
-class OperatorBase : PaddleMobileObject {
+class OperatorBase {
  public:
   /*
    *  @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor
@@ -121,7 +119,7 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
  * @b 所有kernel的父类
  * */
 template <typename Dtype, typename P>
-class OpKernelBase : PaddleMobileObject {
+class OpKernelBase {
  public:
   /*
    * @b 所有kernel 需实现 Compute 方法
@@ -139,14 +137,16 @@ class OpKernelBase : PaddleMobileObject {
       std::shared_ptr<::paddle_mobile::framework::Scope> scope)                \
       : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}
 
-class FusionOpMatcher : PaddleMobileObject {
+class FusionOpMatcher {
  public:
   FusionOpMatcher() {}
 
   virtual std::string Type() = 0;
 
-  virtual void FolderNodes(Node *node) {
-    node->Folder(node_.Depth(), Type(), {});
+  virtual void FolderNodes(
+      Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(), {}, removed_nodes);
   }
 
   virtual Node &BeginNode() { return node_; }
diff --git a/src/framework/paddle_mobile_object.cpp b/src/framework/paddle_mobile_object.cpp
deleted file mode 100644
index acf37a3c117af3bf2bc70aac624335399b4a796b..0000000000000000000000000000000000000000
--- a/src/framework/paddle_mobile_object.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle_mobile_object.h"
diff --git a/src/framework/program/block_desc.cpp b/src/framework/program/block_desc.cpp
index 21322f0825636a321b022220e535cad0e4b8cf41..4b45ab305bf0f353f017674773b5fc51203bfef8 100644
--- a/src/framework/program/block_desc.cpp
+++ b/src/framework/program/block_desc.cpp
@@ -17,13 +17,7 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
 
-std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
-  std::vector<std::shared_ptr<VarDesc>> res;
-  for (const auto &p : vars_) {
-    res.push_back(p.second);
-  }
-  return res;
-}
+std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const { return vars_; }
 
 std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const { return ops_; }
 
@@ -31,10 +25,14 @@ BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc)
     : index_(desc->idx), parent_index_(desc->idx) {
   for (int i = 0; i < desc->n_vars; ++i) {
     PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i];
-    vars_[std::string(var_desc->name)] =
-        std::shared_ptr<VarDesc>(new VarDesc(var_desc));
+    vars_.emplace_back(std::shared_ptr<VarDesc>(new VarDesc(var_desc)));
   }
 
+  std::sort(vars_.begin(), vars_.end(),
+            [](std::shared_ptr<VarDesc> left, std::shared_ptr<VarDesc> right) {
+              return left->Name() < right->Name();
+            });
+
   for (int j = 0; j < desc->n_ops; ++j) {
     PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j];
     ops_.emplace_back(new framework::OpDesc(op_desc));
diff --git a/src/framework/program/block_desc.h b/src/framework/program/block_desc.h
index 84d7a90fc11ddf360eacb01be9456ced4a30dad8..dd33a274266cb503cea0b960c026276d90cea57a 100644
--- a/src/framework/program/block_desc.h
+++ b/src/framework/program/block_desc.h
@@ -15,14 +15,13 @@ limitations under the License. */
 #pragma once
 
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/op_desc.h"
 #include "framework/program/var_desc.h"
 
 namespace paddle_mobile {
 namespace framework {
 
-class BlockDesc : PaddleMobileObject {
+class BlockDesc {
  public:
   friend class Node;
   friend class ProgramOptimize;
@@ -35,10 +34,9 @@ class BlockDesc : PaddleMobileObject {
       ops_.push_back(copy_op_desc);
     }
 
-    for (auto &var_desc : block_desc.vars_) {
-      std::shared_ptr<VarDesc> copy_var_desc =
-          std::make_shared<VarDesc>(*var_desc.second);
-      vars_[var_desc.first] = copy_var_desc;
+    for (int i = 0; i < block_desc.vars_.size(); ++i) {
+      auto &var_desc = block_desc.vars_[i];
+      vars_.emplace_back(std::make_shared<VarDesc>(*var_desc));
     }
   }
 
@@ -64,7 +62,7 @@ class BlockDesc : PaddleMobileObject {
   bool multi_thread_;
   int parent_index_;
   std::vector<std::shared_ptr<OpDesc>> ops_;
-  std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
+  std::vector<std::shared_ptr<VarDesc>> vars_;
 };
 
 }  // namespace framework
diff --git a/src/framework/program/op_desc.h b/src/framework/program/op_desc.h
index 07b903085d5d9044b93e3e9309390c9a3976580d..4fdfac253f0525b288983e8bcf9c1b4eff8f393d 100644
--- a/src/framework/program/op_desc.h
+++ b/src/framework/program/op_desc.h
@@ -20,12 +20,11 @@ limitations under the License. */
 #include "common/log.h"
 #include "common/type_define.h"
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 
 namespace paddle_mobile {
 namespace framework {
 
-class OpDesc : PaddleMobileObject {
+class OpDesc {
  public:
   friend class ProgramOptimize;
   friend class FusionOpMatcher;
diff --git a/src/framework/program/program-optimize/fusion_op_register.cpp b/src/framework/program/program-optimize/fusion_op_register.cpp
deleted file mode 100644
index 010585166cc0828612a48c128f8753338ef16ff9..0000000000000000000000000000000000000000
--- a/src/framework/program/program-optimize/fusion_op_register.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "fusion_op_register.h"
diff --git a/src/framework/program/program-optimize/node.cpp b/src/framework/program/program-optimize/node.cpp
index c165b6568aa37e850d9dfd4dfbec63f8c4d85b02..4ea45ec0a859ef8aa3ab4e34de8279e732706803 100644
--- a/src/framework/program/program-optimize/node.cpp
+++ b/src/framework/program/program-optimize/node.cpp
@@ -12,10 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <sstream>
-
-#include "framework/operator.h"
 #include "framework/program/program-optimize/node.h"
+#include "framework/operator.h"
 
 namespace paddle_mobile {
 
@@ -45,54 +43,13 @@ bool Node::operator==(const Node &in) {
   return true;
 }
 
-bool Node::CanSplit(std::unordered_set<std::string> complex_compute_set) {
-  bool split = false;
-  CanSplit(&split, false, 0, &complex_compute_set, this);
-  return split;
-}
-
-void Node::CanSplit(bool *split, bool spliting, int complex_count,
-                    std::unordered_set<std::string> *complex_compute_set,
-                    Node *pre_node) {
-  if (spliting) {
-    if (complex_compute_set->find(this->type_) != complex_compute_set->end()) {
-      complex_count++;
-    }
-  }
-
-  if (inputs_.size() > 1 && pre_node != inputs_.back()) {
-    return;
-  }
-  if (inputs_.size() > 1 && pre_node == inputs_.back()) {
-    if (complex_count > 1) {
-      *split = true;
-      return;
-    }
-  }
-
-  // multi output, to check
-  if (outputs_.size() > 1) {
-    spliting = true;
-    complex_compute_set = 0;
-  } else {
-    if (spliting == true && inputs_.size() > 0) {
-      spliting = false;
-    } else {
-    }
-  }
-
-  for (auto &output : outputs_) {
-    output->CanSplit(split, spliting, complex_count, complex_compute_set, this);
-  }
-}
-
-std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(uint size) {
+std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(int size) {
   std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
   OpDescs(size - 1, &op_descs);
   return op_descs;
 }
 
-void Node::OpDescs(uint index,
+void Node::OpDescs(int index,
                    std::vector<std::shared_ptr<framework::OpDesc>> *op_desc) {
   if (index == 0) {
     return;
@@ -103,107 +60,6 @@ void Node::OpDescs(uint index,
   }
 }
 
-void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-                   Node *node, bool adding_thread, int thread_num) {
-  if (outputs_.size() > 1) {
-    adding_thread = false;
-  }
-
-  bool can_add_split = false;
-  // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
-  if (outputs_.size() > 1 &&
-      op_input_output_key[op_desc_->type_].second.size() == 1) {
-    can_add_split = true;
-
-    // 遍历当前节点的 output 节点
-    for (const auto &output : outputs_) {
-      // 不支持 output 有多个 output 的情况
-      if (output->outputs_.size() > 0) {
-        can_add_split = false;
-        break;
-      }
-
-      //与节点关联的 OpDesc
-      std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
-
-      //获取这个 op 的 inputs key 和 outputs key
-      auto inputs_and_outputs = op_input_output_key[op_desc->type_];
-
-      //判断现在 是否存在这个 op
-      //判断这个 output 和 input key 的 size 等于 1
-      if (op_input_output_key.find(op_desc->type_) !=
-              op_input_output_key.end() &&
-          inputs_and_outputs.first.size() == 1 &&
-          inputs_and_outputs.second.size() == 1) {
-        auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
-        auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
-
-        // 判断一下, 如果输入和输出没有同名, 是支持的
-        for (int i = 0; i < inputs_of_output.size(); ++i) {
-          std::string input_of_output = inputs_of_output[i];
-          for (int j = 0; j < outputs_of_output.size(); ++j) {
-            std::string output_of_output = outputs_of_output[j];
-            if (input_of_output == output_of_output) {
-              DLOG << "output的 output 包含 input" << input_of_output;
-              can_add_split = false;
-              break;
-            }
-          }
-        }
-      } else {  // 如果模型中包含没有的 op, 则不支持添加 split
-        DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
-        can_add_split = false;
-      }
-    }
-  }
-
-  if (inputs_.size() > 1 && node != inputs_.back()) {
-    return;
-  } else if (inputs_.size() > 1 && node == inputs_.back()) {
-    adding_thread = false;
-    op_desc->push_back(this->op_desc_);
-  } else {
-    op_desc->push_back(this->op_desc_);
-  }
-  if (adding_thread) {
-    Attribute attr;
-    attr.Set<int>(thread_num);
-    this->op_desc_->attrs_["thread"] = attr;
-  }
-
-  if (can_add_split) {
-    adding_thread = true;
-    std::shared_ptr<OpDesc> split_op_desc = std::make_shared<OpDesc>();
-    split_op_desc->type_ = G_OP_TYPE_SPLIT;
-    auto outputs = this->op_desc_->Output(
-        op_input_output_key[this->op_desc_->Type()].second[0]);
-    split_op_desc->inputs_ = {
-        {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}};
-    auto &split_outputs =
-        split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]];
-    for (const auto &output : outputs_) {
-      split_outputs.push_back(outputs[0]);
-    }
-    DLOG << "add split";
-    op_desc->push_back(split_op_desc);
-  }
-
-  for (int i = 0; i < outputs_.size(); ++i) {
-    auto &output = outputs_[i];
-    if (can_add_split) {
-      output->OpDescs(op_desc, this, adding_thread, i);
-    } else {
-      output->OpDescs(op_desc, this, adding_thread, thread_num);
-    }
-  }
-}
-
-std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
-  std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
-  OpDescs(&op_descs, this, false, 0);
-  return op_descs;
-}
-
 std::shared_ptr<Node> Node::To(int size) {
   std::shared_ptr<Node> node = std::make_shared<Node>();
   this->To(size - 1, node);
@@ -224,24 +80,25 @@ void Node::To(int index, std::shared_ptr<Node> node) {
   }
 }
 
-uint Node::Depth(uint begin) {
-  uint depth = 0;
+int Node::Depth(int begin) {
+  int depth = 0;
   begin++;
   for (int i = 0; i < outputs_.size(); ++i) {
-    uint output_depth = outputs_[i]->Depth(begin);
+    int output_depth = outputs_[i]->Depth(begin);
     depth = output_depth > depth ? output_depth : depth;
   }
   return begin > depth ? begin : depth;
 }
 
 Node &Node::Folder(
-    uint size, std::string type,
-    std::map<std::string, std::pair<std::string, std::string>> change) {
+    int size, std::string type,
+    std::map<std::string, std::pair<std::string, std::string>> change,
+    std::vector<std::shared_ptr<Node>> *removed_nodes) {
   std::shared_ptr<framework::OpDesc> op_desc =
       std::make_shared<framework::OpDesc>();
   op_desc->inputs_ = this->op_desc_->inputs_;
   std::vector<std::shared_ptr<Node>> outputs;
-  this->Folder(op_desc, &outputs, size - 1, &change, this);
+  this->Folder(op_desc, &outputs, size - 1, &change, this, removed_nodes);
   this->outputs_ = outputs;
   this->type_ = type;
   this->op_desc_ = op_desc;
@@ -251,9 +108,9 @@ Node &Node::Folder(
 
 void Node::Folder(
     std::shared_ptr<framework::OpDesc> op_desc,
-    std::vector<std::shared_ptr<Node>> *outputs, uint index,
+    std::vector<std::shared_ptr<Node>> *outputs, int index,
     std::map<std::string, std::pair<std::string, std::string>> *change,
-    Node *begin_node) {
+    Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes) {
   if (change->find(this->type_) != change->end()) {
     auto change_pair = (*change)[this->type_];
     op_desc->GetInputs()[change_pair.second] =
@@ -266,7 +123,9 @@ void Node::Folder(
   if (index > 0) {
     --index;
     for (auto output : outputs_) {
-      output->Folder(op_desc, outputs, index, change, begin_node);
+      removed_nodes->push_back(output);
+      output->Folder(op_desc, outputs, index, change, begin_node,
+                     removed_nodes);
     }
   } else {
     for (auto &op_output : this->op_desc_->outputs_) {
@@ -285,7 +144,7 @@ void Node::Folder(
     }
   }
 }
-
+#ifdef PADDLE_MOBILE_DEBUG
 std::string Node::ToString(std::string blank, const Node *node) const {
   std::stringstream ss;
   ss << type_ << "-> \n";
@@ -316,6 +175,7 @@ Print &operator<<(Print &printer, const Node &node) {
   printer << node.ToString();
   return printer;
 }
+#endif
 
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/program/program-optimize/node.h b/src/framework/program/program-optimize/node.h
index 8ef26f897d2052db97780d7bdc23db1641fc4f6f..7236ffdd1782dfb39af73195da9b3756030c9117 100644
--- a/src/framework/program/program-optimize/node.h
+++ b/src/framework/program/program-optimize/node.h
@@ -14,20 +14,17 @@ limitations under the License. */
 
 #pragma once
 
+#include <cinttypes>
 #include <map>
 #include <string>
-#include <unordered_set>
-#include <utility>
 #include <vector>
-
 #include "common/log.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/op_desc.h"
 
 namespace paddle_mobile {
 namespace framework {
 
-class Node : PaddleMobileObject {
+class Node {
   friend class ProgramOptimize;
 
  public:
@@ -37,35 +34,34 @@ class Node : PaddleMobileObject {
       : op_desc_(op_desc), type_(op_desc->Type()) {}
   Node &operator>(std::shared_ptr<Node> node);
   bool operator==(const Node &in);
-  bool CanSplit(std::unordered_set<std::string> complex_compute_set);
+
+#ifdef PADDLE_MOBILE_DEBUG
   std::string ToString() const;
+  void Description();
+#endif
   std::shared_ptr<Node> To(int size);
-  uint Depth(uint begin = 0);
+  int Depth(int begin = 0);
   Node &Folder(
-      uint size, std::string type,
-      std::map<std::string, std::pair<std::string, std::string>> change_map);
-  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size);
-  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs();
+      int size, std::string type,
+      std::map<std::string, std::pair<std::string, std::string>> change_map,
+      std::vector<std::shared_ptr<Node>> *removed_nodes);
+  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(int size);
   std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; }
   std::string Type() { return type_; }
-  void Description();
 
  private:
-  void CanSplit(bool *split, bool spliting, int complex_count,
-                std::unordered_set<std::string> *complex_compute_set,
-                Node *pre_node);
-  void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-               Node *node, bool adding_thread, int thread_num);
-  void OpDescs(uint size,
+  void OpDescs(int size,
                std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
   void To(int index, std::shared_ptr<Node>);
   void Folder(
       std::shared_ptr<framework::OpDesc> op_desc,
-      std::vector<std::shared_ptr<Node>> *outputs, uint index,
+      std::vector<std::shared_ptr<Node>> *outputs, int index,
       std::map<std::string, std::pair<std::string, std::string>> *change,
-      Node *begin_node);
+      Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes);
   std::shared_ptr<framework::OpDesc> op_desc_;
+#ifdef PADDLE_MOBILE_DEBUG
   std::string ToString(std::string blank, const Node *node) const;
+#endif
   std::vector<std::shared_ptr<Node>> outputs_;
   std::vector<Node *> inputs_;
   std::string type_;
diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp
index d9c3c51c3c8ab528d71d992b3710e981a5087729..15724523ded18e14cecf5d5aacf506992dadb3b4 100644
--- a/src/framework/program/program-optimize/program_optimize.cpp
+++ b/src/framework/program/program-optimize/program_optimize.cpp
@@ -31,6 +31,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
     std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>>
         type_map;
 
+    std::vector<std::shared_ptr<Node>> nodes;
+
     std::shared_ptr<Node> begin_node;
     auto block = optimize_program->Block(i);
     //        DLOG << " ops size: " << block->Ops().size();
@@ -38,11 +40,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
       auto op = block->Ops()[j];
       auto op_type = op->Type();
       if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
-        LOG(kLOG_ERROR) << "return null ";
+        LOG(kLOG_ERROR) << "has not support op return null "
+                        << " op type: " << op->Type();
         return nullptr;
       }
 
       std::shared_ptr<Node> node = std::make_shared<Node>(op);
+      nodes.push_back(node);
 
       //
       type_map[op->Type()].push_back(node);
@@ -87,21 +91,26 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
           //          DLOG << " match success " << " fusion node: \n" <<
           //          matcher->BeginNode() << "\nsub node: \n" << *sub_node;
           //          DLOG << "match node\n"<< *match_node;
-          matcher->FolderNodes(match_node.get());
-          //          DLOG << " after match node\n"<< *match_node;
-          //          match_node->Description();
 
-          //          DLOG << "begin node: \n" << *begin_node;
+          std::vector<std::shared_ptr<Node>> removed_nodes;
+          matcher->FolderNodes(match_node.get(), &removed_nodes);
+
+          for (int j = 0; j < removed_nodes.size(); ++j) {
+            auto removed_node = removed_nodes[j];
+            auto removed_ite =
+                std::find(nodes.begin(), nodes.end(), removed_node);
+            nodes.erase(removed_ite);
+          }
         }
       }
     }
 
-    //    DLOG << "node: \n" << *begin_node;
-
     std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
-    //    bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV,
-    //    G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV});
-    GenerateOps(&op_descs, begin_node.get());
+    for (int m = 0; m < nodes.size(); ++m) {
+      auto &node = nodes[m];
+      op_descs.push_back(node->op_desc_);
+    }
+    //    GenerateOps(&op_descs, begin_node.get());
     block->ops_ = op_descs;
   }
 
@@ -118,6 +127,14 @@ void ProgramOptimize::GenerateOps(
     Node *current_node) {
   if (current_node->inputs_.size() > 1 &&
       input_node != current_node->inputs_.back()) {
+    DLOG << " current type " << current_node->type_;
+
+    DLOG << " inputs size of current node > 0 ";
+
+    for (int i = 0; i < current_node->inputs_.size(); ++i) {
+      DLOG << " input i: " << current_node->inputs_[i]->type_;
+    }
+
     return;
   } else if (current_node->inputs_.size() > 1 &&
              input_node == current_node->inputs_.back()) {
diff --git a/src/framework/program/program.cpp b/src/framework/program/program.cpp
deleted file mode 100644
index 83e389917c76df50ea0380795b36ff012da01568..0000000000000000000000000000000000000000
--- a/src/framework/program/program.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-namespace paddle_mobile {
-namespace framework {}
-}  // namespace paddle_mobile
diff --git a/src/framework/program/program.h b/src/framework/program/program.h
index 3a9cbfc1d9e3f3d099bcfeac32260613fc4dccc3..bb82fa7334a7d1941734dcd846c8e66befdbdd10 100644
--- a/src/framework/program/program.h
+++ b/src/framework/program/program.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 
 #include "common/types.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/program_desc.h"
 #include "framework/scope.h"
 
@@ -23,12 +22,14 @@ namespace paddle_mobile {
 namespace framework {
 
 template <typename Dtype, Precision P = Precision::FP32>
-class Program : PaddleMobileObject {
+class Program {
  public:
   std::shared_ptr<ProgramDesc> originProgram;
   std::shared_ptr<ProgramDesc> optimizeProgram;
   std::shared_ptr<Scope> scope;
   std::string model_path;
+  std::string para_path;
+  bool is_commbine = false;
 
  private:
 };
diff --git a/src/framework/program/program_desc.h b/src/framework/program/program_desc.h
index 6aa7dd44ce5880d8b1db0e2b3ffad2e0bd31d46e..5c87f565e13df1564343b43150a5696c3adaca39 100644
--- a/src/framework/program/program_desc.h
+++ b/src/framework/program/program_desc.h
@@ -18,13 +18,12 @@ limitations under the License. */
 
 #include "common/types.h"
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/block_desc.h"
 
 namespace paddle_mobile {
 namespace framework {
 
-class ProgramDesc : PaddleMobileObject {
+class ProgramDesc {
  public:
   friend class Node;
   friend class ProgramOptimize;
diff --git a/src/framework/program/var_desc.h b/src/framework/program/var_desc.h
index 5ab2fc56178b8d48d2dfb637817eca13b53677d5..f6f04f2c7026166e1024dcc1a4b2a233deac649b 100644
--- a/src/framework/program/var_desc.h
+++ b/src/framework/program/var_desc.h
@@ -14,40 +14,14 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
+
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/tensor_desc.h"
 
 namespace paddle_mobile {
 namespace framework {
 
-/*
-
-PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL = 0,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16 = 1,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32 = 2,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64 = 3,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16 = 4,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32 = 5,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64 = 6,
-
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR = 7,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS = 8,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH = 9,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST = 10,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES = 11,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE = 12,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_ARRAY = 13,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST = 14,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER = 15,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL = 16,
-
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW = 17,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE = 18
-
-
-                                                                 */
-
 class VarDesc {
  public:
   VarDesc(const VarDesc &var_desc) {
@@ -56,14 +30,6 @@ class VarDesc {
     this->persistable_ = var_desc.persistable_;
     this->tensor_desc_ = var_desc.tensor_desc_;
     this->type_ = var_desc.type_;
-    /*
-     *
-     *  std::string name_;
-  bool persistable_;
-  TensorDesc tensor_desc_;
-  VarType_Type type_;
-  VarType_Type data_type_;
-     * */
   }
   VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) {
     type_ = (VarType_Type)desc->type->type;
@@ -102,39 +68,6 @@ class VarDesc {
 
   const TensorDesc &Tensor_desc() const { return tensor_desc_; }
 
-  //  const proto::VarType::ChannelDesc &channel_desc() const {
-  //    switch (desc_.type().type()) {
-  //      case proto::VarType::CHANNEL:
-  //        return desc_.type().channel();
-  //      default:
-  //        break;
-  //    }
-  //  }
-
-  //  proto::VarType::Type GetDataType() const {
-  //    switch (desc_.type().type()) {
-  //      case proto::VarType::CHANNEL:
-  //        return channel_desc().data_type();
-  //        break;
-  //      default:
-  //        return tensor_desc().data_type();
-  //    }
-  //  }
-
-  //  template <typename T>
-  //  std::vector<T> RepeatedToVector(
-  //      const google::protobuf::RepeatedField<T> &repeated_field) const {
-  //    std::vector<T> ret;
-  //    ret.reserve(repeated_field.size());
-  //    std::copy(repeated_field.begin(), repeated_field.end(),
-  //              std::back_inserter(ret));
-  //    return ret;
-  //  }
-
-  //  std::vector<int64_t> GetShape() const {
-  //    return this->RepeatedToVector(tensor_desc().dims());
-  //  }
-
  private:
   std::string name_;
   bool persistable_;
diff --git a/src/framework/scope.cpp b/src/framework/scope.cpp
index c5ee2d39fa7a7bf4c1c7b1c2f3fb8f1e92f4e455..2f7ff247b846f0a5f3e59c5c2f317a59598fc643 100644
--- a/src/framework/scope.cpp
+++ b/src/framework/scope.cpp
@@ -22,7 +22,6 @@ namespace paddle_mobile {
 namespace framework {
 
 Scope &Scope::NewScope() const {
-  std::unique_lock<std::mutex> lock(mutex_);
   kids_.push_back(new Scope(this));
   return *kids_.back();
 }
@@ -72,11 +71,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
 }
 
 void Scope::DeleteScope(Scope *scope) const {
-  std::unique_lock<std::mutex> lock(mutex_);
   auto it = std::find(kids_.begin(), kids_.end(), scope);
   kids_.erase(it);
   delete scope;
-  // deferent
 }
 
 void Scope::EraseVars(const std::vector<std::string> &var_names) {
@@ -104,14 +101,6 @@ void Scope::Rename(const std::string &origin_name,
   vars_[new_name] = origin_it->second;
   vars_.erase(origin_it);
 }
-//
-//            std::string Scope::Rename(const std::string& origin_name)
-//            const {
-//                auto var_name = string::Sprintf("%p.%d", this,
-//                vars_.size());
-//                Rename(origin_name, var_name);
-//                return var_name;
-//            }
 
 Variable *Scope::FindVarLocally(const std::string &name) const {
   auto it = vars_.find(name);
diff --git a/src/framework/scope.h b/src/framework/scope.h
index 8b194654f61d7502184b45c7eb07d655b70784dc..d714f61af3bd443c09fcef7aacee2416b90b5e02 100644
--- a/src/framework/scope.h
+++ b/src/framework/scope.h
@@ -14,17 +14,16 @@ limitations under the License. */
 
 #pragma once
 
-#include <list>           //std::list
-#include <mutex>          //std::mutex
-#include <unordered_map>  //std::unordered_map
+#include <list>
+#include <unordered_map>
 #include "variable.h"
 
 namespace paddle_mobile {
 namespace framework {
 class Scope {
  public:
-  Scope() {}
-  ~Scope() {}
+  Scope() = default;
+  ~Scope() = default;
 
   Scope &NewScope() const;
 
@@ -70,8 +69,6 @@ class Scope {
   mutable std::unordered_map<std::string, Variable *> vars_;
   mutable std::list<Scope *> kids_;
   Scope const *parent_{nullptr};
-
-  mutable std::mutex mutex_;
 };
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/selected_rows.h b/src/framework/selected_rows.h
deleted file mode 100644
index f59bd1aabfb7a0571b484fa21375acb4cb8254d3..0000000000000000000000000000000000000000
--- a/src/framework/selected_rows.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <vector>
-
-#include "lod_tensor.h"
-#include "tensor.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-class SelectedRows {
- public:
-  SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
-      : rows_(rows), height_(height) {
-    value_.reset(new Tensor());
-  }
-
-  SelectedRows() {
-    height_ = 0;
-    value_.reset(new Tensor());
-  }
-
-  const Tensor &value() const { return *value_; }
-
-  Tensor *mutable_value() { return value_.get(); }
-
-  int64_t height() const { return height_; }
-
-  void set_height(int64_t height) { height_ = height; }
-
-  const std::vector<int64_t> &rows() const { return rows_; }
-
-  std::vector<int64_t> *mutable_rows() { return &rows_; }
-
-  void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
-
-  /**
-   * get the index of id in rows
-   */
-  int64_t index(int64_t id) const {
-    auto it = std::find(rows_.begin(), rows_.end(), id);
-    //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
-    return static_cast<int64_t>(std::distance(rows_.begin(), it));
-  }
-
-  DDim GetCompleteDims() const {
-    std::vector<int64_t> dims = vectorize(value_->dims());
-    dims[0] = height_;
-    return make_ddim(dims);
-  }
-
- private:
-  // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
-  // here.
-  // SelectedRows are simply concated when adding together. Until a
-  // SelectedRows add a Tensor, will the duplicate rows be handled.
-  std::vector<int64_t> rows_;
-  std::unique_ptr<Tensor> value_{nullptr};
-  int64_t height_;
-};
-
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 674edd67733ef8d0520d28f5c131e9da6746ad17..a5f9afebdd1c68d1858679a22d001d42a745c62d 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -14,14 +14,15 @@ limitations under the License. */
 
 #pragma once
 
-#include <common/enforce.h>
 #include <cstdint>
 #include <cstring>
 #include <memory>
 #include <type_traits>
 #include <typeindex>
 #include <vector>
+#include "common/enforce.h"
 
+#include "common/enforce.h"
 #include "framework/data_layout.h"
 #include "framework/ddim.h"
 #include "memory/t_malloc.h"
diff --git a/src/framework/tensor_util.cpp b/src/framework/tensor_util.cpp
index 23b775b095d04c46764791f9f8438f2b888263bd..465502cb19173e26361905752e76e75c15229893 100644
--- a/src/framework/tensor_util.cpp
+++ b/src/framework/tensor_util.cpp
@@ -13,137 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "tensor_util.h"
-#include <algorithm>
-#include <limits>
-#include <vector>
 
 namespace paddle_mobile {
 namespace framework {
 
 void TensorCopy(const Tensor &src, Tensor *dst) {
-  //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
-  //  src.place() << " to
-  //  "
-  //          << dst_place;
   src.check_memory_size();
-
   dst->Resize(src.dims());
   dst->set_layout(src.layout());
   auto src_ptr = src.data<void>();
-
   auto dst_ptr = dst->mutable_data(src.type());
-
   auto size = src.numel() * SizeOfType(src.type());
-
   memory::Copy(dst_ptr, src_ptr, size);
 }
 
-void TensorCopySync(const Tensor &src, Tensor *dst) {
-  //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
-  //  src.place()
-  //          << " to " << dst_place;
-  src.check_memory_size();
-  dst->Resize(src.dims());
-  dst->set_layout(src.layout());
-  auto src_ptr = src.data<void>();
-  auto dst_ptr = dst->mutable_data(src.type());
-  auto size = src.numel() * SizeOfType(src.type());
-  memory::Copy(dst_ptr, src_ptr, size);
-}
-
-template <typename Predicate>
-struct AnyDTypeVisitor {
-  Predicate predicate_;
-  const Tensor &tensor_;
-  Tensor *out_;
-
-  AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
-      : predicate_(predicate), tensor_(tensor), out_(out) {}
-
-  template <typename T>
-  void operator()() const {
-    //    auto t = EigenVector<T>::Flatten(tensor_);
-    //    auto o = EigenScalar<bool>::From(*out_);
-    // return any of predicate_(t) is true.
-    //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
-  }
-};
-
-template <typename Predicate>
-inline void AnyImpl(Predicate predicate, const Tensor &tensor,
-                    framework::Tensor *out) {
-  VisitDataType(ToDataType(tensor.type()),
-                AnyDTypeVisitor<Predicate>(predicate, tensor, out));
-}
-
-template <typename Predicate>
-struct AnyVisitor {
-  const framework::Tensor &tensor_;
-  Predicate predicate_;
-
-  AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
-      : tensor_(tensor), predicate_(std::move(predicate)) {}
-
-  bool operator()(void) const {
-    framework::Tensor out;
-    out.Resize({1});
-    out.mutable_data<bool>();
-    AnyImpl(predicate_, tensor_, &out);
-    return this->GetResult(out);
-  }
-
-  bool GetResult(const framework::Tensor &out) const {
-    return *out.data<bool>();
-  }
-};
-
-template <typename Predicate>
-inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
-  AnyVisitor<Predicate> visitor(tensor, predicate);
-  //  return platform::VisitPlace(visitor);
-  return visitor();
-}
-
-struct ContainsNANPredicate {
-  template <typename T>
-  auto operator()(const T &eigen_vec) const
-      -> decltype(std::declval<T>().isnan()) {
-    // Cast eigen_vector to vector of bool. true if is inf.
-    return eigen_vec.isnan();
-  }
-};
-
-bool TensorContainsNAN(const framework::Tensor &tensor) {
-  ContainsNANPredicate predicate;
-  return Any(tensor, predicate);
-}
-
-struct ContainsInfPredicate {
-  template <typename T>
-  auto operator()(const T &eigen_vec) const
-      -> decltype(std::declval<T>().isinf()) {
-    // Cast eigen_vector to vector of bool. true if is inf.
-    return eigen_vec.isinf();
-  }
-};
-
-bool TensorContainsInf(const framework::Tensor &tensor) {
-  ContainsInfPredicate predicate;
-  return Any(tensor, predicate);
-}
-
-struct DeserializedDataFunctor {
-  DeserializedDataFunctor(void **buf, Tensor *tensor)
-      : buf_(buf), tensor_(tensor) {}
-
-  template <typename T>
-  void operator()() {
-    *buf_ = tensor_->mutable_data<T>();
-  }
-
-  void **buf_;
-  Tensor *tensor_;
-};
-
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/tensor_util.h b/src/framework/tensor_util.h
index 9af873d34a914b966a20c79a9c8f815309cba680..f888049b395e48b9d10cea731b092c899952e3d8 100644
--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
@@ -15,51 +15,12 @@ limitations under the License. */
 #pragma once
 #include <vector>
 #include "memory/t_malloc.h"
-#include "platform/data_type.h"
 #include "tensor.h"
 
 namespace paddle_mobile {
 namespace framework {
 
 void TensorCopy(const Tensor &src, Tensor *dst);
-void TensorCopySync(const Tensor &src, Tensor *dst);
-
-template <typename T>
-void TensorFromVector(const std::vector<T> &src, Tensor *dst);
-
-template <typename T>
-void TesnorToVector(const Tensor &src, std::vector<T> *dst);
-
-bool TensorContainsNAN(const framework::Tensor &tensor);
-bool TensorContainsInf(const framework::Tensor &tensor);
-
-void TensorToStream(std::ostream &os, const Tensor &tensor);
-void TensorFromStream(std::istream &is, Tensor *tensor);
-
-//
-// The implementation of template functions.
-//
-
-template <typename T>
-void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
-  auto src_ptr = static_cast<const void *>(src.data());
-  dst->Resize({static_cast<int64_t>(src.size())});
-  auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
-  auto size = src.size() * sizeof(T);
-
-  memory::Copy(dst_ptr, src_ptr, size);
-}
-
-template <typename T>
-void TensorToVector(const Tensor &src, std::vector<T> *dst) {
-  auto src_ptr = static_cast<const void *>(src.data<T>());
-  auto size = src.numel() * sizeof(T);
-
-  dst->resize(src.numel());
-  auto dst_ptr = static_cast<void *>(dst->data());
-
-  memory::Copy(dst_ptr, src_ptr, size);
-}
 
 }  // namespace framework
 }  // namespace paddle_mobile
diff --git a/src/framework/var_type.h b/src/framework/var_type.h
deleted file mode 100644
index 5e132c73759bfa3a863023baf52df6ef41365047..0000000000000000000000000000000000000000
--- a/src/framework/var_type.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "framework.pb.h"
-#include "lod_tensor.h"
-#include "selected_rows.h"
-#include "variable.h"
-
-namespace paddle_mobile {
-namespace framework {
-inline proto::VarType::Type ToVarType(std::type_index type) {
-  if (type.hash_code() == typeid(LoDTensor).hash_code()) {
-    return proto::VarType_Type_LOD_TENSOR;
-  } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
-    return proto::VarType_Type_SELECTED_ROWS;
-  } else {
-    //    PADDLE_THROW("ToVarType:Unsupported type %s",
-    //    type.name());
-  }
-}
-
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/src/framework/variable.h b/src/framework/variable.h
index 07cb6377e0c9ca89f828eded887b8d1da2d8aae6..e1527b3a331eb67c31aec5011bf84de3dc9bc247 100644
--- a/src/framework/variable.h
+++ b/src/framework/variable.h
@@ -14,19 +14,17 @@ limitations under the License. */
 
 #pragma once
 
-#include <iostream>
 #include <memory>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
 #include "../common/variant.h"
-#include "paddle_mobile_object.h"
 
 namespace paddle_mobile {
 namespace framework {
 using std::string;
 
-class Variable : public PaddleMobileObject {
+class Variable {
  public:
   template <typename T>
   const T *Get() const {
diff --git a/src/io.cpp b/src/io/io.cpp
similarity index 64%
rename from src/io.cpp
rename to src/io/io.cpp
index 8f6a07f2dd1f8f2c2daa09f220bddc463c268e9e..9c34378d99e52e8e2919944a9319e8cc97d6b074 100644
--- a/src/io.cpp
+++ b/src/io/io.cpp
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "io.h"
-#include <fstream>
 #include <vector>
-#include "common/log.h"
 
 #include "common/enforce.h"
+#include "common/log.h"
 #include "framework/framework.pb-c.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
@@ -30,16 +29,20 @@ limitations under the License. */
 namespace paddle_mobile {
 using framework::Variable;
 
-void ReadBinaryFile(const std::string &filename, std::string *contents) {
-  std::ifstream fin(filename, std::ios::in | std::ios::binary);
-  PADDLE_MOBILE_ENFORCE(fin.is_open(), "open file: %s failed",
+char *Get_binary_data(std::string filename) {
+  FILE *file = fopen(filename.c_str(), "rb");
+  PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
                         filename.c_str());
-  fin.seekg(0, std::ios::end);
-  contents->clear();
-  contents->resize(fin.tellg());
-  fin.seekg(0, std::ios::beg);
-  fin.read(&(contents->at(0)), contents->size());
-  fin.close();
+  fseek(file, 0, SEEK_END);
+  long size = ftell(file);
+  PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
+  rewind(file);
+  char *data = new char[size];
+  size_t bytes_read = fread(data, 1, size, file);
+  PADDLE_MOBILE_ENFORCE(bytes_read == size,
+                        "read binary file bytes do not match with fseek");
+  fclose(file);
+  return data;
 }
 
 static size_t ReadBuffer(const char *file_name, uint8_t **out) {
@@ -66,110 +69,27 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
 }
 
 template <typename Dtype, Precision P>
-void Loader<Dtype, P>::LoadVar(framework::Variable *variable,
-                               const framework::VarDesc &var_desc,
-                               const std::string &file_path) {
-  auto tensor = variable->GetMutable<framework::LoDTensor>();
-  std::ifstream is(file_path);
-  PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed",
-                        file_path.c_str());
-
-  std::fpos<mbstate_t> pos;
-  pos = is.tellg();  // save   current   position
-  is.seekg(0, std::ios::end);
-  is.seekg(pos);  // restore   saved   position
-
-  // 1. version
-  uint32_t version;
-  is.read(reinterpret_cast<char *>(&version), sizeof(version));
-
-  // 2 Lod information
-  uint64_t lod_level;
-  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
-  auto &lod = *tensor->mutable_lod();
-  lod.resize(lod_level);
-  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size;
-    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-    std::vector<size_t> tmp(size / sizeof(size_t));
-    is.read(reinterpret_cast<char *>(tmp.data()),
-            static_cast<std::streamsize>(size));
-    for (auto j : tmp) {
-      LOG(kLOG_DEBUG1) << "    lod - " << j;
-    }
-    lod[i] = tmp;
-  }
-
-  // 3. tensor version
-  uint32_t tensor_version;
-  is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
-
-  // 4. tensor desc
-  int32_t size;
-  is.read(reinterpret_cast<char *>(&size), sizeof(size));
-  std::unique_ptr<char[]> buf(new char[size]);
-  is.read(reinterpret_cast<char *>(buf.get()), size);
-
-  const framework::TensorDesc &desc = var_desc.Tensor_desc();
-
-  PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor_desc = NULL;
-  //  void *v;
-  //  PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure()(tensor_desc,
-  //  buf.get());
-
-  //  DLOG << "PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure- " <<
-  //  tensor_desc;
-
-  //  framework::TensorDesc &tensor_desc = variable->
-  //  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
-  //  uint8_t *proto_buf = NULL;
-  //  size_t read_size = ReadBuffer(file_path.c_str(), &proto_buf);
-  //  c_program = paddle_mobile__framework__proto__program_desc__unpack(NULL,
-  //  read_size, buf);
-
-  //  paddle_mobile__framework__proto__var_type__tensor_desc__init()
-
-  int memory_size = 1;
-  for (auto l : desc.Dims()) {
-    memory_size *= l;
-  }
-
-  tensor->Resize(framework::make_ddim(desc.Dims()));
-
-  void *memory = tensor;
-  int type_size = 0;
-  switch (desc.DataType()) {
-    case framework::VARTYPE_TYPE_FP16:
-      type_size = 2;
-      break;
-    case framework::VARTYPE_TYPE_FP32:
-      type_size = 4;
-      memory = tensor->mutable_data<float>();
-      break;
-    case framework::VARTYPE_TYPE_FP64:
-      type_size = 8;
-      break;
-    case framework::VARTYPE_TYPE_INT32:
-      type_size = 4;
-      break;
-    case framework::VARTYPE_TYPE_INT64:
-      type_size = 8;
-      break;
-    case framework::VARTYPE_TYPE_BOOL:
-      type_size = 1;
-      break;
-    default:
-      break;
-  }
-
-  is.read(static_cast<char *>(memory), memory_size * type_size);
-  is.close();
+const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
+    const std::string &dirname, bool optimize) {
+  auto program = this->LoadProgram(dirname + "/__model__", optimize);
+  program.model_path = dirname;
+  return program;
 }
 
 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &dirname, bool optimize) {
-  std::string model_filename = dirname + "/__model__";
+    const std::string &model_path, const std::string &para_path,
+    bool optimize) {
+  auto program = this->LoadProgram(model_path, optimize);
+  program.para_path = para_path;
+  program.is_commbine = true;
+  return program;
+}
+
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
+    const std::string &model_path, bool optimize) {
+  std::string model_filename = model_path;
   PaddleMobile__Framework__Proto__ProgramDesc *c_program;
   uint8_t *buf = NULL;
   size_t read_size = ReadBuffer(model_filename.c_str(), &buf);
@@ -183,22 +103,16 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
   //
   DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
   //
-  std::shared_ptr<framework::ProgramDesc> originProgramDesc =
-      std::make_shared<framework::ProgramDesc>(c_program);
+  auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program);
 
   framework::Program<Dtype, P> program;
-  program.model_path = dirname;
   program.originProgram = originProgramDesc;
 
-  std::shared_ptr<framework::Scope> scope =
-      std::make_shared<framework::Scope>();
+  auto scope = std::make_shared<framework::Scope>();
   program.scope = scope;
-  originProgramDesc->Block(0);
 
   for (const auto &block : originProgramDesc->Blocks()) {
-    for (int i = 0; i < block->Vars().size(); ++i) {
-      std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
-      //      DLOG << "var name-- " << var_desc->Name();
+    for (auto var_desc : block->Vars()) {
       auto var = scope->Var(var_desc->Name());
 
       if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
@@ -221,6 +135,8 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
     }
   }
 
+  //  originProgramDesc->Description("program: ");
+
   if (optimize) {
     framework::ProgramOptimize program_optimize;
     program.optimizeProgram =
@@ -267,36 +183,38 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
       ops_of_block_[*block_desc.get()].push_back(op_base);
     }
   }
-  InitMemory();
+  if (program_.is_commbine) {
+    InitCombineMemory();
+  } else {
+    InitMemory();
+  }
 }
 
 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
-                                    framework::LoDTensor *tensor,
-                                    const std::string &file_path) {
-  std::ifstream is(file_path);
-  PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed",
-                        file_path.c_str());
-  std::fpos<mbstate_t> pos;
-  pos = is.tellg();  // save   current   position
-  is.seekg(0, std::ios::end);
-  is.seekg(pos);  // restore   saved   position
-
+                                    framework::LoDTensor *tensor, char *&data) {
   // 1. version
-  uint32_t version;
-  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  uint32_t version = *(uint32_t *)data;
+  data += sizeof(uint32_t);
 
   // 2 Lod information
-  uint64_t lod_level;
-  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+  uint64_t lod_level = *(uint64_t *)data;
+  data += sizeof(uint64_t);
+
   auto &lod = *tensor->mutable_lod();
   lod.resize(lod_level);
   for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size;
-    is.read(reinterpret_cast<char *>(&size), sizeof(size));
+    uint64_t size = *(uint64_t *)data;
+    data += sizeof(uint64_t);
+    DLOG << "lod size: " << i << size;
     std::vector<size_t> tmp(size / sizeof(size_t));
-    is.read(reinterpret_cast<char *>(tmp.data()),
-            static_cast<std::streamsize>(size));
+
+    for (int k = 0; k < tmp.size(); ++k) {
+      tmp[k] = *(size_t *)data;
+      DLOG << "tmp[k]: " << k << *(size_t *)data;
+      data += sizeof(size_t);
+    }
+
     for (auto j : tmp) {
       LOG(kLOG_DEBUG1) << "    lod - " << j;
     }
@@ -304,17 +222,20 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
   }
 
   // 3. tensor version
-  uint32_t tensor_version;
-  is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
+  uint32_t tensor_version = *(uint32_t *)data;
+  data += sizeof(uint32_t);
 
   // 4. tensor desc
-  int32_t size;
-  is.read(reinterpret_cast<char *>(&size), sizeof(size));
+  int32_t size = *(int32_t *)data;
+  data += sizeof(int32_t);
+
   std::unique_ptr<char[]> buf(new char[size]);
-  is.read(reinterpret_cast<char *>(buf.get()), size);
+  for (int m = 0; m < size; ++m) {
+    buf.get()[m] = data[m];
+  }
+  data += (sizeof(char) * size);
 
   const framework::TensorDesc &desc = var_desc.Tensor_desc();
-
   int memory_size = 1;
   for (auto l : desc.Dims()) {
     memory_size *= l;
@@ -348,8 +269,10 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
       break;
   }
 
-  is.read(static_cast<char *>(memory), memory_size * type_size);
-  is.close();
+  for (int n = 0; n < memory_size * type_size; ++n) {
+    static_cast<char *>(memory)[n] = data[n];
+  }
+  data += (sizeof(char) * memory_size * type_size);
 }
 
 template <typename Dtype, Precision P>
@@ -362,8 +285,12 @@ void Executor<Dtype, P>::InitMemory() {
         if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
           continue;
         }
-        LoadMemory(*var_desc, tensor,
-                   program_.model_path + "/" + var_desc->Name());
+
+        char *origin_data =
+            Get_binary_data(program_.model_path + "/" + var_desc->Name());
+        char *data = origin_data;
+        LoadMemory(*var_desc, tensor, data);
+        delete origin_data;
       } else {
         if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
           auto tensor = var->template GetMutable<framework::LoDTensor>();
@@ -375,6 +302,30 @@ void Executor<Dtype, P>::InitMemory() {
   }
 }
 
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::InitCombineMemory() {
+  char *origin_data = Get_binary_data(program_.para_path);
+  char *data = origin_data;
+  for (const auto &block : to_predict_program_->Blocks()) {
+    for (const auto &var_desc : block->Vars()) {
+      auto var = program_.scope->Var(var_desc->Name());
+      if (var_desc->Persistable()) {
+        auto tensor = var->template GetMutable<framework::LoDTensor>();
+        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
+          continue;
+        }
+        LoadMemory(*var_desc, tensor, data);
+      } else {
+        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
+          auto tensor = var->template GetMutable<framework::LoDTensor>();
+          tensor->template mutable_data<Ptype>();
+        }
+      }
+    }
+  }
+  delete origin_data;
+}
+
 template <typename Dtype, Precision P>
 std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
     const framework::Tensor &t) {
diff --git a/src/io.h b/src/io/io.h
similarity index 62%
rename from src/io.h
rename to src/io/io.h
index ae99197baa97c07d2a883f8721d533b85ab7873a..fb18ca0cc1768f5cfe39acfcba7d0117a67e1de5 100644
--- a/src/io.h
+++ b/src/io/io.h
@@ -14,51 +14,73 @@ limitations under the License. */
 
 #pragma once
 
-#include <memory.h>
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
 
 #include "common/types.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/program.h"
 #include "framework/tensor.h"
 
 namespace paddle_mobile {
 
-template <typename Dtype, Precision P = Precision::FP32>
-class Loader : PaddleMobileObject {
+template <typename Dtype = CPU, Precision P = Precision::FP32>
+class Loader {
  public:
+  /*
+   * @b load separate format fluid model
+   * @b 加载分开形式的 fluid 模型
+   * */
   const framework::Program<Dtype, P> Load(const std::string &dirname,
-                                          bool optimize = true);
+                                          bool optimize = false);
+
+  /*
+   * @b load combine format fluid mode
+   * @b 加载结合在一起格式的模型
+   * */
+  const framework::Program<Dtype, P> Load(const std::string &model_path,
+                                          const std::string &para_path,
+                                          bool optimize = false);
 
  private:
-  void LoadVar(framework::Variable *variable,
-               const framework::VarDesc &var_desc,
-               const std::string &file_path);
+  const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
+                                                 bool optimize = false);
 };
 
-template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Executor {
  public:
   typedef typename PrecisionTrait<P>::ptype Ptype;
 
+  /*
+   * @b init executor with program load by Loader class
+   * @b 用 loader load 的 program 实例化 executor
+   * */
   Executor(const framework::Program<Dtype> p, int batch_size = 1,
            bool use_optimize = true);
 
+  /*
+   * @b to predict
+   * */
   std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
 
+  /*
+   * @b to predict with vector and dim
+   *
+   * @b 使用 输入 和 输入的维度信息 进行预测
+   * */
   std::vector<Ptype> Predict(const std::vector<Ptype> &input,
                              const std::vector<int64_t> &dims);
 
  protected:
   Executor() = default;
-
   void InitMemory();
   void LoadMemory(const framework::VarDesc var_desc,
-                  framework::LoDTensor *tensor, const std::string &file_path);
+                  framework::LoDTensor *tensor, char *&data);
+  void InitCombineMemory();
   framework::Program<Dtype> program_;
   int batch_size_ = 1;
   std::shared_ptr<framework::ProgramDesc> to_predict_program_;
diff --git a/src/jni/paddle_mobile_jni.cpp b/src/jni/paddle_mobile_jni.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f663b78fd490f2c9f0af525c7dabd2cc513c3a53
--- /dev/null
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -0,0 +1,93 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ANDROID
+
+#include "paddle_mobile_jni.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+namespace paddle_mobile {
+namespace jni {
+using framework::DDim;
+using framework::Program;
+using framework::Tensor;
+using paddle_mobile::CPU;
+using std::string;
+
+extern const char *ANDROID_LOG_TAG =
+    "paddle_mobile LOG built on " __DATE__ " " __TIME__;
+static Executor<CPU> *shared_executor_instance = nullptr;
+
+// toDo mutex lock
+// static std::mutex shared_mutex;
+
+Executor<CPU> *getExecutorInstance(const Program<CPU> p, int batch_size,
+                                   bool use_optimize) {
+  if (nullptr == shared_executor_instance) {
+    shared_executor_instance = new Executor<CPU>(p, batch_size, use_optimize);
+  }
+  return shared_executor_instance;
+}
+
+string jstring2cppstring(JNIEnv *env, jstring jstr) {
+  const char *cstr = env->GetStringUTFChars(jstr, 0);
+  string cppstr(cstr);
+  env->ReleaseStringUTFChars(jstr, cstr);
+  return cppstr;
+}
+
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
+                                                          jclass thiz,
+                                                          jstring modelPath) {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  bool optimize = true;
+  auto program = loader.Load(jstring2cppstring(env, modelPath), optimize);
+  shared_executor_instance = getExecutorInstance(program, 1, optimize);
+  return shared_executor_instance != nullptr ? JNI_TRUE : JNI_FALSE;
+}
+
+JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
+    JNIEnv *env, jclass thiz, jfloatArray buf) {
+  jfloatArray result = NULL;
+  int count = 0;
+  float *dataPointer = nullptr;
+  if (nullptr != buf) {
+    dataPointer = env->GetFloatArrayElements(buf, NULL);
+  }
+  framework::Tensor input;
+  framework::DDim ddim = framework::make_ddim({1, 3, 224, 224});
+  input.Resize(ddim);
+  auto input_ptr = input.mutable_data<float>();
+  for (int i = 0; i < framework::product(ddim); i++) {
+    input_ptr[i] = dataPointer[i];
+  }
+  auto output = shared_executor_instance->Predict(input);
+  count = output->numel();
+  result = env->NewFloatArray(count);
+  env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+  return result;
+}
+
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
+                                                       jclass thiz) {}
+
+}  // namespace jni
+}  // namespace paddle_mobile
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/jni/paddle_mobile_jni.h b/src/jni/paddle_mobile_jni.h
new file mode 100644
index 0000000000000000000000000000000000000000..a262d4070c37013977e869fa816d52d78fbfa485
--- /dev/null
+++ b/src/jni/paddle_mobile_jni.h
@@ -0,0 +1,51 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#ifdef ANDROID
+#include <jni.h>
+#include "common/log.h"
+#include "framework/tensor.h"
+#include "io/io.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+namespace paddle_mobile {
+namespace jni {
+/**
+ * load model & params of the net for android
+ */
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
+                                                          jclass thiz,
+                                                          jstring modelPath);
+
+/**
+ * object detection for anroid
+ */
+JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
+    JNIEnv *env, jclass thiz, jfloatArray buf);
+
+/**
+ * clear data of the net when destroy for android
+ */
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PMLL_clear(JNIEnv *env,
+                                                        jclass thiz);
+}  // namespace jni
+}  // namespace paddle_mobile
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp
index 1f8a1698f4281174d2503650bde5deb0ef9825e9..0de5111d0625b45efd51c5afac989391631a3bed 100644
--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -12,7 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BATCHNORM_OP
+
 #include "batchnorm_op.h"
+#include "framework/op_proto_maker.h"
+#include "framework/op_registry.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +33,5 @@ template class BatchNormOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(batch_norm);
 REGISTER_OPERATOR(batch_norm, ops::BatchNormOp);
+
+#endif
diff --git a/src/operators/batchnorm_op.h b/src/operators/batchnorm_op.h
index 760466eeddcb472ed2a47625b786a021ce7c1ef5..ea774662abc093c36f75ad693aff579323becb23 100644
--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BATCHNORM_OP
+
 #pragma once
 
 #include <string>
@@ -27,7 +29,7 @@ class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   BatchNormOp(const string &type, const VariableNameMap &inputs,
               const VariableNameMap &outputs,
-              const framework::AttributeMap attrs,
+              const framework::AttributeMap &attrs,
               std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -47,3 +49,5 @@ class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/box_coder_op.cpp b/src/operators/box_coder_op.cpp
index ca653b5711241e77a9df308922aedb0551b1103f..22d006a258ca0cd18b63dc72aed6a02405ff6e81 100644
--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BOXCODER_OP
+
 #include "operators/box_coder_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -52,3 +54,5 @@ template class BoxCoderOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(box_coder);
 REGISTER_OPERATOR(box_coder, ops::BoxCoderOp);
+
+#endif
diff --git a/src/operators/box_coder_op.h b/src/operators/box_coder_op.h
index a2203e1d89f8b5b6270c1576711a4c008d927e34..001ef20023a4500adec558e6f0bddb16a3c65551 100644
--- a/src/operators/box_coder_op.h
+++ b/src/operators/box_coder_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BOXCODER_OP
+
 #pragma once
 
 #include <string>
@@ -30,7 +32,7 @@ class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   BoxCoderOp(const std::string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
-             const framework::AttributeMap attrs,
+             const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -50,3 +52,5 @@ class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp
index 6744b47b7728558f95fad0435979841a73a7a6f6..26f5e7d4e48ee2c3402a821b49757b1b0914828a 100644
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONCAT_OP
+
 #include "concat_op.h"
 
 namespace paddle_mobile {
@@ -62,3 +64,5 @@ template class ConcatOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(concat);
 REGISTER_OPERATOR(concat, ops::ConcatOp);
+
+#endif
diff --git a/src/operators/concat_op.h b/src/operators/concat_op.h
index 15160e20a403d73bb11e982f5a527454f26b5dd6..fff704e4d858a7c67a0e8331089d8e8d5d4639fb 100644
--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONCAT_OP
+
 #pragma once
 
 #include <string>
@@ -25,7 +27,7 @@ template <typename DeviceType, typename T>
 class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   ConcatOp(const string &type, const VariableNameMap &inputs,
-           const VariableNameMap &outputs, const framework::AttributeMap attrs,
+           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
            std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -45,3 +47,5 @@ class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp
index bfddcf14acbba016c4e4333e05fcc7dd6eebc509..c8ec33333f596a6c10491cfdb826f1dc54d69c6f 100644
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONV_OP
+
 #include "operators/conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
 
@@ -23,30 +24,11 @@ namespace operators {
 
 template <typename Dtype, typename T>
 void ConvOp<Dtype, T>::InferShape() const {
-  //  std::cout << " begin get dims: " << std::endl;
-
   auto in_dims = param_.Input()->dims();
-
-  //  std::cout << " end get in dims: " << std::endl;
-
-  //  std::cout << " in_dims: " << in_dims << std::endl;
-
-  //  std::cout << " begin get Filter " << std::endl;
-
   auto filter_dims = param_.Filter()->dims();
-
-  //  std::cout << " end get Filter " << std::endl;
-
-  //  std::cout << " begin get Attrs " << std::endl;
-
   const std::vector<int> &strides = param_.Strides();
-
-  //  std::cout << " end get Attrs " << strides[0] << std::endl;
-
   std::vector<int> paddings = param_.Paddings();
-
   int groups = param_.Groups();
-
   std::vector<int> dilations = param_.Dilations();
 
   PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
@@ -73,3 +55,5 @@ template class ConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(conv2d);
 REGISTER_OPERATOR(conv2d, ops::ConvOp);
+
+#endif
diff --git a/src/operators/conv_op.h b/src/operators/conv_op.h
index f15f286b606db1403b0e0e609bfc38caac2c5105..0a26ce6c3f1ee005e982f10dcc3b38853124bdfb 100644
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONV_OP
+
 #pragma once
 
 #include <string>
@@ -53,3 +55,5 @@ inline int ConvOutputSize(int input_size, int filter_size, int dilation,
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/depthwise_conv_op.cpp b/src/operators/depthwise_conv_op.cpp
index 2538298175c5ea40d7e44338caee853a73c089c4..87c9746b4dfa1e74fcf3733656b9b3b27a8740fb 100644
--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef DEPTHWISECONV_OP
+
 #include "operators/depthwise_conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
 #include "operators/conv_op.h"
@@ -55,3 +56,5 @@ template class DepthwiseConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(depthwise_conv2d);
 REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp);
+
+#endif
diff --git a/src/operators/depthwise_conv_op.h b/src/operators/depthwise_conv_op.h
index c47fa0ffcacd54a5ddf7280419ca1170173bde1b..37ba1b9ada32d75cb715dd86221758c71c6b1929 100644
--- a/src/operators/depthwise_conv_op.h
+++ b/src/operators/depthwise_conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef DEPTHWISECONV_OP
+
 #pragma once
 
 #include <string>
@@ -47,3 +49,5 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp
index 1eff80152bfb193fc8cd3866d63b1ae4d55f4b9c..ff2cd2598814cf9a270090213c0524c165c66ced 100644
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef ELEMENTWISEADD_OP
+
 #include "elementwise_add_op.h"
 
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class ElementwiseAddOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(elementwise_add);
 REGISTER_OPERATOR(elementwise_add, ops::ElementwiseAddOp);
+
+#endif
diff --git a/src/operators/elementwise_add_op.h b/src/operators/elementwise_add_op.h
index 7dd7e147a0630450c3ad9f830d661b2b92a5f995..62034b14edcbc9ec6ad44af59f6927b8cfa38aa2 100644
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef ELEMENTWISEADD_OP
+
 #pragma once
 
 #include <string>
@@ -27,7 +29,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   ElementwiseAddOp(const string &type, const VariableNameMap &inputs,
                    const VariableNameMap &outputs,
-                   const framework::AttributeMap attrs,
+                   const framework::AttributeMap &attrs,
                    std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -46,3 +48,5 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/framework/program/var_desc.cpp b/src/operators/fusion_conv_add.cpp
similarity index 63%
rename from src/framework/program/var_desc.cpp
rename to src/operators/fusion_conv_add.cpp
index e54ae67b55c15540a0232dc6fdd97e70ae721ddb..fe380bddca585e434418513d5152c1df0426e80d 100644
--- a/src/framework/program/var_desc.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -12,9 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "var_desc.h"
+#ifdef FUSIONCONVADD_OP
 
+#include "operators/fusion_conv_add.h"
 namespace paddle_mobile {
+namespace operators {
 
-namespace framework {}  // namespace framework
+template <typename Dtype, typename T>
+void FushionConvAddOp<Dtype, T>::InferShape() const {}
+template class FushionConvAddOp<CPU, float>;
+}  // namespace operators
 }  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+USE_OP(conv_add);
+REGISTER_OPERATOR(conv_add, ops::FushionConvAddOp);
+
+#endif
diff --git a/src/operators/fusion_conv_add.h b/src/operators/fusion_conv_add.h
new file mode 100644
index 0000000000000000000000000000000000000000..1e6a84b1dc157e0b58273bc5ff379ad079ed2860
--- /dev/null
+++ b/src/operators/fusion_conv_add.h
@@ -0,0 +1,72 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSIONCONVADD_OP
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionConvAddMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionConvAddMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
+  }
+
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    vector<std::shared_ptr<framework::OpDesc>> origin_descs =
+        node->OpDescs(node_.Depth());
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Y"}}}, removed_nodes);
+  }
+
+  std::string Type() { return G_OP_TYPE_CONV_ADD; }
+};
+
+template <typename DeviceType, typename T>
+class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
+ public:
+  FushionConvAddOp(const string &type, const VariableNameMap &inputs,
+                   const VariableNameMap &outputs,
+                   const framework::AttributeMap &attrs,
+                   std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope) {}
+
+  void RunImpl() const {}
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+ protected:
+  //  FushionFcParam param_;
+};
+
+// static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp
index 92f6fcf848f169eed141b1456c05e6fbd8ca9895..bf33db7d78e995c087478f947ece7038953fa42f 100644
--- a/src/operators/fusion_conv_add_relu_op.cpp
+++ b/src/operators/fusion_conv_add_relu_op.cpp
@@ -12,4 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONVADDRELU_OP
+
 #include "fusion_conv_add_relu_op.h"
+
+#endif
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index e93c910d2b3132fb1894043a7c6aa3c8593dbb20..4825a01be95f31d11418fe114700aaaa248e0d7e 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONVADDRELU_OP
+
 #pragma once
 
 #include "framework/operator.h"
@@ -28,16 +30,18 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
         std::make_shared<framework::Node>(G_OP_TYPE_RELU);
   }
 
-  void FolderNodes(framework::Node *node) {
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
     std::vector<std::shared_ptr<framework::OpDesc>> origin_descs =
         node->OpDescs(node_.Depth());
     node->Folder(node_.Depth(), Type(),
-                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes);
   }
   std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; }
 };
 
-class FusionFcOp {
+class ConvAddReluOp {
  public:
  private:
 };
@@ -47,3 +51,5 @@ class FusionFcOp {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp
index 0f1be5c29fee1f741b773bbfa11b50b5aa49b8b7..8f639e212a1a922fb1a943d2582dd692e1bfabee 100644
--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef FUSION_FC_OP
+
 #include "operators/fusion_fc_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -54,3 +56,5 @@ template class FushionFcOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(fc);
 REGISTER_OPERATOR(fc, ops::FushionFcOp);
+
+#endif
diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h
index 9019ef4d49641414682639b7a27cf93a20e43cf4..e5c5c04f6938499c1c39ca2e5120ef3084b9c1ad 100644
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef FUSION_FC_OP
+
 #pragma once
 
 #include <string>
@@ -32,11 +34,13 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
     node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
   }
 
-  void FolderNodes(framework::Node *node) {
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
     vector<std::shared_ptr<framework::OpDesc>> origin_descs =
         node->OpDescs(node_.Depth());
     node->Folder(node_.Depth(), Type(),
-                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes);
   }
 
   std::string Type() { return G_OP_TYPE_FC; }
@@ -47,7 +51,7 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   FushionFcOp(const string &type, const VariableNameMap &inputs,
               const VariableNameMap &outputs,
-              const framework::AttributeMap attrs,
+              const framework::AttributeMap &attrs,
               std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -65,7 +69,9 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
   FushionFcParam param_;
 };
 
-static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp
index e28bdd7147f300cb181ffc5e0aeebec412ec45e7..4327b7f3163f013f270ca4428227075e4883f96c 100644
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BATCHNORM_OP
+
 #pragma once
 
 #include "operators/kernel/batchnorm_kernel.h"
@@ -47,7 +49,7 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
   Tensor inv_std;
   auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
   if (C != variance->numel()) {
-    std::cout << "C must equal to variance.numel()" << std::endl;
+    DLOG << "C must equal to variance.numel()";
   }
   assert(C == variance->numel());
 
@@ -91,3 +93,5 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/box_coder_kernel.cpp b/src/operators/kernel/arm/box_coder_kernel.cpp
index d604c3d2a8d7f7fb1c817397a61cb156f1d0f392..9654228911af77e751e4ef9d1b92fb92ae30591d 100644
--- a/src/operators/kernel/arm/box_coder_kernel.cpp
+++ b/src/operators/kernel/arm/box_coder_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
+#ifdef BOXCODER_OP
 
 #include "operators/kernel/box_coder_kernel.h"
 
@@ -135,3 +135,5 @@ void BoxCoderKernel<CPU, float>::Compute(const BoxCoderParam& param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/concat_kernel.cpp b/src/operators/kernel/arm/concat_kernel.cpp
index 705b698dbe9e9768713417f85ae2879df66acf9e..329677fb11e6ee2db74b5191586ac6157ede9697 100644
--- a/src/operators/kernel/arm/concat_kernel.cpp
+++ b/src/operators/kernel/arm/concat_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
+#ifdef CONCAT_OP
 
 #include "operators/kernel/concat_kernel.h"
 
@@ -85,3 +85,5 @@ void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/conv_kernel.cpp b/src/operators/kernel/arm/conv_kernel.cpp
index f04b8156c9d3c88520b1c74b60a20f41e7fedc98..546ae33407d4c5affd6459d4167ba5b373887f12 100644
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONV_OP
+
 #include "operators/kernel/conv_kernel.h"
 
 namespace paddle_mobile {
@@ -112,3 +114,5 @@ template class ConvKernel<CPU, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
index 1da52fa8d469bd81d043843d7bcca3a7b01f6663..6cd4538c4540ff11d91a6f49d088ad38f6d992e7 100644
--- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp
+++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef DEPTHWISECONV_OP
+
 #include "operators/kernel/depthwise_conv_kernel.h"
 #include "operators/kernel/conv_kernel.h"
 
@@ -124,3 +126,5 @@ template class DepthwiseConvKernel<CPU, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/elementwise_add_kernel.cpp b/src/operators/kernel/arm/elementwise_add_kernel.cpp
index f8d40ad17ff09d77c26a9f32a87190f1cdd6038a..02aabfe3ce0622df80c86906f45ab5cc688c7b12 100644
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef ELEMENTWISEADD_OP
+
 #pragma once
 
 #include "operators/kernel/elementwise_add_kernel.h"
@@ -40,3 +42,5 @@ template class ElementwiseAddKernel<CPU, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/fushion_fc_kernel.cpp b/src/operators/kernel/arm/fushion_fc_kernel.cpp
index ebec90aa27154334488329d079b76d14630e3294..ea88252c21ab2f13f0564602ac9b922be521578b 100644
--- a/src/operators/kernel/arm/fushion_fc_kernel.cpp
+++ b/src/operators/kernel/arm/fushion_fc_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef FUSION_FC_OP
+
 #pragma once
 
 #include "operators/kernel/fushion_fc_kernel.h"
@@ -65,3 +67,5 @@ void FushionFcKernel<CPU, float>::Compute(const FushionFcParam &param) const {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/lrn_kernel.cpp b/src/operators/kernel/arm/lrn_kernel.cpp
index 47e64d487d72eb191e6b0ec8751c877363dd7b48..3e12b62508204b38150d7fcc82cef99f7617ba09 100644
--- a/src/operators/kernel/arm/lrn_kernel.cpp
+++ b/src/operators/kernel/arm/lrn_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef LRN_OP
+
 #pragma once
 
 #include "operators/kernel/lrn_kernel.h"
@@ -42,3 +44,5 @@ template class LrnKernel<CPU, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/mul_kernel.cpp b/src/operators/kernel/arm/mul_kernel.cpp
index f1eea3950cebe8d4c27b3481bf527e75f26c99aa..70bcac2461cdef535de8c9759ec10113e45b7ae2 100644
--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MUL_OP
+
 #pragma once
 
 #include "operators/kernel/mul_kernel.h"
@@ -48,3 +50,5 @@ template class MulKernel<CPU, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/multiclass_nms_kernel.cpp b/src/operators/kernel/arm/multiclass_nms_kernel.cpp
index 61470ee31936f092e2f534c5534c1c78aaf5d44c..39f55dab38031db14b617e48eedb236eacd1b714 100644
--- a/src/operators/kernel/arm/multiclass_nms_kernel.cpp
+++ b/src/operators/kernel/arm/multiclass_nms_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MULTICLASSNMS_OP
+
 #pragma once
 
 #include "operators/kernel/multiclass_nms_kernel.h"
@@ -273,3 +275,5 @@ void MultiClassNMSKernel<CPU, float>::Compute(
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/pool_kernel.cpp b/src/operators/kernel/arm/pool_kernel.cpp
index 6aa1b76058fdf8a9828321a23f26b1c17134d7c9..2809a802a6cf94c931e409aecfa0090139624a46 100644
--- a/src/operators/kernel/arm/pool_kernel.cpp
+++ b/src/operators/kernel/arm/pool_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #include <operators/kernel/pool_kernel.h>
 #include "common/log.h"
 
@@ -73,3 +75,5 @@ void PoolKernel<CPU, float>::Compute(const PoolParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/prior_box_kernel.cpp b/src/operators/kernel/arm/prior_box_kernel.cpp
index fc61f43f3fe363c1f6d67f81ef37fb2d950f9717..e029c555d4d40745976be45b7a9c022eb62705c7 100644
--- a/src/operators/kernel/arm/prior_box_kernel.cpp
+++ b/src/operators/kernel/arm/prior_box_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef PRIORBOX_OP
+
 #pragma once
 
 #include "operators/kernel/prior_box_kernel.h"
@@ -143,3 +145,5 @@ void PriorBoxKernel<CPU, float>::Compute(const PriorBoxParam &param) const {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp
index 586d981175184e2da03f2949390932b888d67f4a..854fa1d185ddb002aa37a10ade0683d841af8793 100644
--- a/src/operators/kernel/arm/relu_kernel.cpp
+++ b/src/operators/kernel/arm/relu_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RELU_OP
+
 #pragma once
 
 #include "operators/kernel/relu_kernel.h"
@@ -45,3 +47,5 @@ void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/reshape_kernel.cpp b/src/operators/kernel/arm/reshape_kernel.cpp
index 7f7e80ece9f30631c109d0d27f4025e2617cec95..3d40309e97145e1df70f2a4191ee571c4a05627a 100644
--- a/src/operators/kernel/arm/reshape_kernel.cpp
+++ b/src/operators/kernel/arm/reshape_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RESHAPE_OP
+
 #pragma once
 
 #include "operators/kernel/reshape_kernel.h"
@@ -49,3 +51,5 @@ void ReshapeKernel<CPU, float>::Compute(const ReshapeParam &param) const {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/sigmoid_kernel.cpp b/src/operators/kernel/arm/sigmoid_kernel.cpp
index 74bc29878019dfe52de94f6fef966a416e04cc72..20f275ff482d7073195d075c374e4a0969993714 100644
--- a/src/operators/kernel/arm/sigmoid_kernel.cpp
+++ b/src/operators/kernel/arm/sigmoid_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SIGMOID_OP
+
 #include "../sigmoid_kernel.h"
 #if __ARM_NEON
 #include "../../math/math_func_neon.h"
@@ -25,35 +27,23 @@ using framework::Tensor;
 
 void sigmoid(const Tensor *X, Tensor *Y) {
 #if __ARM_NEON
-  DLOG << "step1";
   const float *input = X->data<float>();
-  DLOG << "step11";
-
   float *output = Y->mutable_data<float>();
-  DLOG << "step2";
-
   const DDim &dDim = X->dims();
-  DLOG << "step3";
-
   int axis_index = 1;
   if (dDim.size() < 4) {
     axis_index = 0;
   }
-  DLOG << "step4";
-
   DDim outer_ddim =
       paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
   DDim inner_ddim =
       paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
-  DLOG << "step5";
-
   int out_size = paddle_mobile::framework::product(outer_ddim);
   int inner_size = paddle_mobile::framework::product(inner_ddim);
-  DLOG << "step6";
 
-#pragma omp parallel for
   DLOG << "outsize=" << out_size;
   DLOG << "innersize=" << inner_size;
+  #pragma omp parallel for
   for (int i = 0; i < out_size; ++i) {
     const float *input_outer_ptr = input + i * inner_size;
     float *output_outer_ptr = output + i * inner_size;
@@ -93,3 +83,5 @@ void SigmoidKernel<CPU, float>::Compute(const SigmoidParam &param) const {
 template class SigmoidKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/softmax_kernel.cpp b/src/operators/kernel/arm/softmax_kernel.cpp
index 0a50fc0a0136b66df4f55c10decc84a541b52dce..542283242d09abfbad8830eb0b36136ed35a6ef6 100644
--- a/src/operators/kernel/arm/softmax_kernel.cpp
+++ b/src/operators/kernel/arm/softmax_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SOFTMAX_OP
+
 #include "../softmax_kernel.h"
 #include "../../math/softmax.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam &param) const {
 template class SoftmaxKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/arm/transpose_kernel.cpp b/src/operators/kernel/arm/transpose_kernel.cpp
index 92b5916ec40d53bb55c1cc4aaf0ce6ec9a9bfaeb..3ebe261fb8fe511022d6efbf4641898ef326319f 100644
--- a/src/operators/kernel/arm/transpose_kernel.cpp
+++ b/src/operators/kernel/arm/transpose_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
+#ifdef TRANSPOSE_OP
 
 #include "operators/kernel/transpose_kernel.h"
 
@@ -70,3 +70,5 @@ void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/batchnorm_kernel.h b/src/operators/kernel/batchnorm_kernel.h
index ebace43e1c559df1bf997d05f68db862d1ed3cb4..6c795b2d5e9e7e81fb25d4a1a6dd3ca13c04bd9b 100644
--- a/src/operators/kernel/batchnorm_kernel.h
+++ b/src/operators/kernel/batchnorm_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BATCHNORM_OP
+
+#pragma once
+
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 
 namespace paddle_mobile {
 namespace operators {
@@ -30,3 +33,5 @@ class BatchNormKernel
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/box_coder_kernel.h b/src/operators/kernel/box_coder_kernel.h
index 2d350202d091563f668f9209a1540bb0a32b6ac3..1c612b373cd086fcd566fe69e71eb77e4d1a30b6 100644
--- a/src/operators/kernel/box_coder_kernel.h
+++ b/src/operators/kernel/box_coder_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef BOXCODER_OP
+
+#pragma once
+
 #include <vector>
 
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -31,3 +33,5 @@ class BoxCoderKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/concat_kernel.h b/src/operators/kernel/concat_kernel.h
index d91fb84f015851074e317980f1fe9ff930e9e399..3b649974e8bb670b7ec81c61f185a2d8f9b24ad0 100644
--- a/src/operators/kernel/concat_kernel.h
+++ b/src/operators/kernel/concat_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONCAT_OP
+
 #pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
@@ -29,3 +31,5 @@ class ConcatKernel : public framework::OpKernelBase<DeviceType, ConcatParam> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/conv_kernel.h b/src/operators/kernel/conv_kernel.h
index d43a174ffdbf0ca6dbb39e463b8e97652c7b0daf..06c0c2c55629d9762cffa0b2c5572050b95bc771 100644
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONV_OP
+
+#pragma once
+
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
@@ -19,8 +23,6 @@ limitations under the License. */
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -49,3 +51,5 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/depthwise_conv_kernel.h b/src/operators/kernel/depthwise_conv_kernel.h
index 43ddfb25cd859a7e937577221215d8352b846bff..1ef76a573e27ff09fe7842ad78e9fe6042a742a1 100644
--- a/src/operators/kernel/depthwise_conv_kernel.h
+++ b/src/operators/kernel/depthwise_conv_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef DEPTHWISECONV_OP
+
+#pragma once
+
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
 #include "operators/math/math_function.h"
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -32,3 +34,5 @@ class DepthwiseConvKernel : public OpKernelBase<DeviceType, ConvParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/elementwise_add_kernel.h b/src/operators/kernel/elementwise_add_kernel.h
index 28b3bc29e593561d18512cbf1af947dd64cd9d87..7a2f92120105b9f9539937e00c392c0eb77e3830 100644
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once;
+#ifdef ELEMENTWISEADD_OP
+
+#pragma once
 
 #include "framework/operator.h"
 #include "operators/math/elementwise_op_function.h"
@@ -31,3 +33,5 @@ class ElementwiseAddKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp
index a50a5c59bdaaa3829602049bf88bf41fa02af53c..21badb0d8eaf125a6e46bf3283adca90a175b984 100644
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef CONV_OP
+
 namespace paddle_mobile {
 namespace operators {
 
@@ -22,3 +24,5 @@ namespace operators {
 // template class ConvKernel<FPGA, float>;
 }
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fushion_fc_kernel.h b/src/operators/kernel/fushion_fc_kernel.h
index 7597a7120d1840128810730ad3fab11fd01b10fa..aff8917664341d980fca67f846aa7e2926bdd534 100644
--- a/src/operators/kernel/fushion_fc_kernel.h
+++ b/src/operators/kernel/fushion_fc_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef FUSION_FC_OP
+
+#pragma once
+
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -29,3 +31,5 @@ class FushionFcKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/lrn_kernel.h b/src/operators/kernel/lrn_kernel.h
index f5fd8313482a92aad0c01d3e0acc9dcfcc83f2d8..ca04a45572bd922baa936bc151f7730c16131f40 100644
--- a/src/operators/kernel/lrn_kernel.h
+++ b/src/operators/kernel/lrn_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef LRN_OP
+
+#pragma once
+
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 
 namespace paddle_mobile {
 namespace operators {
@@ -70,3 +73,5 @@ class LrnKernel : public framework::OpKernelBase<DeviceType, LrnParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/framework/paddle_mobile_object.h b/src/operators/kernel/mali/conv_kernel.cpp
similarity index 72%
rename from src/framework/paddle_mobile_object.h
rename to src/operators/kernel/mali/conv_kernel.cpp
index aff4b6c1f178ba70c756c49721ac9b34de82c71c..695f937880328e8c2ffed91a8beee23e9a72899a 100644
--- a/src/framework/paddle_mobile_object.h
+++ b/src/operators/kernel/mali/conv_kernel.cpp
@@ -12,21 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
+#ifdef CONV_OP
 
-#include <string>
-#include "stdio.h"
+#include "operators/kernel/conv_kernel.h"
 
 namespace paddle_mobile {
+namespace operators {
 
-class PaddleMobileObject {
- public:
-  virtual std::string ToString() {
-    char address[128] = {0};
-    sprintf(address, "%p", this);
-    return std::string(address);
-  }
+template <>
+void ConvKernel<GPU_MALI, float>::Compute(const ConvParam &param) const {}
 
- private:
-};
+template class ConvKernel<GPU_MALI, float>;
+}  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/mul_kernel.h b/src/operators/kernel/mul_kernel.h
index 809c9b80b5ba0d610827d8fa5ff00d5ad7183ab9..4ca1df1af188b4e9b95644d0796a7968f873f6f4 100644
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MUL_OP
+
+#pragma once
+
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
-#pragma once;
 
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +32,5 @@ class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/multiclass_nms_kernel.h b/src/operators/kernel/multiclass_nms_kernel.h
index 4453197e5c866398bc6f8807ec921ff5638fbb71..82bafe2685423f8014d95b8fc875554567d2094a 100644
--- a/src/operators/kernel/multiclass_nms_kernel.h
+++ b/src/operators/kernel/multiclass_nms_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MULTICLASSNMS_OP
+
+#pragma once
+
 #include "framework/operator.h"
-#include "operators/op_param.h"
 
-#pragma once;
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -28,3 +31,5 @@ class MultiClassNMSKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/pool_kernel.h b/src/operators/kernel/pool_kernel.h
index 5cb185dea6eaed0bbb50c5fd5d3450d4e92f18e7..2a7b0ec48edeb922d6701e6ce4a9b6a514bc58f7 100644
--- a/src/operators/kernel/pool_kernel.h
+++ b/src/operators/kernel/pool_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #pragma once
 
 #include "framework/operator.h"
@@ -29,3 +31,5 @@ class PoolKernel : public OpKernelBase<DeviceType, PoolParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/prior_box_kernel.h b/src/operators/kernel/prior_box_kernel.h
index c3cd399bfe9fad86b45c33d947dbbb3e4f99bade..3e7c72a736ea56beb6cede1d5892675d6721163f 100644
--- a/src/operators/kernel/prior_box_kernel.h
+++ b/src/operators/kernel/prior_box_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef PRIORBOX_OP
+
+#pragma once
+
 #include <vector>
 
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -55,3 +57,5 @@ class PriorBoxKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/relu_kernel.h b/src/operators/kernel/relu_kernel.h
index 83b4548f3e5421657ae6f79bd226e16e1aba7ffb..793268f35a78255f853c85d1af0d2ef0d3d328e5 100644
--- a/src/operators/kernel/relu_kernel.h
+++ b/src/operators/kernel/relu_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RELU_OP
+
+#pragma once
+
 #include "framework/operator.h"
-#include "operators/op_param.h"
 
-#pragma once;
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -27,3 +30,5 @@ class ReluKernel : public framework::OpKernelBase<DeviceType, ReluParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/reshape_kernel.h b/src/operators/kernel/reshape_kernel.h
index 7d5dcdf71de232b1c72180231731fcf76483b9e4..6b153e5fe3eba73f548fd1fc0ab9f95a5b390bf1 100644
--- a/src/operators/kernel/reshape_kernel.h
+++ b/src/operators/kernel/reshape_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <vector>
+#ifdef RESHAPE_OP
+
+#pragma once
 
+#include <vector>
 #include "framework/operator.h"
-#include "operators/op_param.h"
 
-#pragma once;
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -72,3 +74,5 @@ class ReshapeKernel : public framework::OpKernelBase<DeviceType, ReshapeParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/sigmoid_kernel.h b/src/operators/kernel/sigmoid_kernel.h
index 8f5c787f3ff009ed1e334e61657d00454d6e4c0b..e901f02096c764537f268f628ccdc379f3a503e1 100644
--- a/src/operators/kernel/sigmoid_kernel.h
+++ b/src/operators/kernel/sigmoid_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SIGMOID_OP
+
 #pragma once
 
 #include "framework/operator.h"
@@ -27,3 +29,5 @@ class SigmoidKernel : public OpKernelBase<DeviceType, SigmoidParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/softmax_kernel.h b/src/operators/kernel/softmax_kernel.h
index 5bdae46d288adef3c07c6b2735bdfe5e6ec0c1c3..2b2d753cf666a6eb58f70f2f43afbbefb3953d8b 100644
--- a/src/operators/kernel/softmax_kernel.h
+++ b/src/operators/kernel/softmax_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SOFTMAX_OP
+
 #pragma once
 
 #include "framework/operator.h"
@@ -30,3 +32,5 @@ class SoftmaxKernel : public OpKernelBase<DeviceType, SoftmaxParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/transpose_kernel.h b/src/operators/kernel/transpose_kernel.h
index aa7d8902097df441eaa28ea8a74b5e9234f7daea..82d73ac82cd28edbd5b6fc349748293fd00fcf45 100644
--- a/src/operators/kernel/transpose_kernel.h
+++ b/src/operators/kernel/transpose_kernel.h
@@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef TRANSPOSE_OP
+
+#pragma once
+
 #include <vector>
 
 #include "framework/operator.h"
 #include "operators/op_param.h"
 
-#pragma once;
-
 namespace paddle_mobile {
 namespace operators {
 
@@ -30,3 +32,5 @@ class TransposeKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/lrn_op.cpp b/src/operators/lrn_op.cpp
index cc89a034b4c43bcee7778cad0c16c614e74bb5fb..f072b22b063c6eb28cb5c0a183b51e6071c82bd3 100644
--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef LRN_OP
+
 #include "lrn_op.h"
 
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class LrnOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(lrn);
 REGISTER_OPERATOR(lrn, ops::LrnOp);
+
+#endif
diff --git a/src/operators/lrn_op.h b/src/operators/lrn_op.h
index e5d98e1bb103307e1fae9c2460be19fe9d0f01a0..c0f7abba0bd095c7408787eda3b819a81fa2227e 100644
--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+
+#ifdef LRN_OP
+
 #pragma once
 
 #include <string>
@@ -25,7 +28,7 @@ template <typename DeviceType, typename T>
 class LrnOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   LrnOp(const string &type, const VariableNameMap &inputs,
-        const VariableNameMap &outputs, const framework::AttributeMap attrs,
+        const VariableNameMap &outputs, const framework::AttributeMap &attrs,
         std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -45,3 +48,5 @@ class LrnOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp
index 0c0ae8e3dd84f38218d03a761c58a664b927f161..fc243766bf9f8760178ac4efb0dfdd11a5742fa9 100644
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "operators/math/gemm.h"
+#ifndef X86
+#include <arm_neon.h>
+#endif
 
 namespace paddle_mobile {
 namespace operators {
 namespace math {
+float ab[MR * NR];
 // 将A矩阵分块复制到连续内存(ColMajor)
 void PackMatrixA(int m, int k, int paddingM, const float *A, int lda,
                  float *buffer) {
@@ -170,17 +174,197 @@ void InnerKernel(int m, int n, int k, float alpha, const float *A, int lda,
 }
 
 // 计算一个更小的 4 * 4 的 C 矩阵分块
+#if defined(IOS)
+void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
+               int ldb, float beta, float *C, int ldc, int mc, int nc) {
+  // init C
+  float32x4_t cv0 = vdupq_n_f32(0.0);
+  float32x4_t cv1 = vdupq_n_f32(0.0);
+  float32x4_t cv2 = vdupq_n_f32(0.0);
+  float32x4_t cv3 = vdupq_n_f32(0.0);
+
+  float32x4_t av;
+  float32x4_t bv;
+
+  float32x2_t av01;
+  float32x2_t av23;
+
+  for (int p = 0; p < k; p += 1) {
+    av = vld1q_f32(a);
+    bv = vld1q_f32(b);
+
+    av01 = vget_low_f32(av);
+    cv0 = vmlaq_lane_f32(cv0, bv, av01, 0);
+    cv1 = vmlaq_lane_f32(cv1, bv, av01, 1);
+    av23 = vget_high_f32(av);
+    cv2 = vmlaq_lane_f32(cv2, bv, av23, 0);
+    cv3 = vmlaq_lane_f32(cv3, bv, av23, 1);
+
+    a += MR;
+    b += NR;
+  }
+  float32x4x4_t cv = {cv0, cv1, cv2, cv3};
+  int i, j;
+  for (i = 0; i < mc; ++i) {
+    for (j = 0; j < nc; ++j) {
+      if (beta == 0.0) {
+        C(i, j) = 0.0;
+      } else if (beta != 1.0) {
+        C(i, j) *= beta;
+      }
+      if (j == 0) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 0);
+      } else if (j == 1) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 1);
+      } else if (j == 2) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 2);
+      } else if (j == 3) {
+        C(i, j) += alpha * vgetq_lane_f32(cv.val[i], 3);
+      }
+    }
+  }
+}
+#elif defined(ARMV7)
+void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
+               int ldb, float beta, float *C, int ldc, int mc, int nc) {
+  int kc1 = k / 2, kc2 = k % 2;
+  int bytes_ldc = 4 * ldc;
+  int flag_alpha = (alpha == 1.0) ? 1 : 2;
+  int flag_beta;
+  if (beta == 0.0) {
+    flag_beta = 0;
+  } else if (beta == 1.0) {
+    flag_beta = 1;
+  } else {
+    flag_beta = 2;
+  }
+  asm volatile(
+      "vmov.f32   q10,    #0.0        \n\t"
+      "vmov.f32   q11,    #0.0        \n\t"
+      "vmov.f32   q12,    #0.0        \n\t"
+      "vmov.f32   q13,    #0.0        \n\t"
+
+      "subs       %[kc1], %[kc1], #1  \n\t"
+      "blt        end_kc1_%=          \n\t"
+      "loop_kc1_%=:                   \n\t"
+      "vld1.32    {q0, q1}, [%[a]]!   \n\t"
+      "vld1.32    {q2, q3}, [%[b]]!   \n\t"
+      "vmla.f32   q10, q2, d0[0]      \n\t"
+      "vmla.f32   q11, q2, d0[1]      \n\t"
+      "vmla.f32   q12, q2, d1[0]      \n\t"
+      "vmla.f32   q13, q2, d1[1]      \n\t"
+      "vmla.f32   q10, q3, d2[0]      \n\t"
+      "vmla.f32   q11, q3, d2[1]      \n\t"
+      "vmla.f32   q12, q3, d3[0]      \n\t"
+      "vmla.f32   q13, q3, d3[1]      \n\t"
+      "subs       %[kc1], %[kc1], #1  \n\t"
+      "bge        loop_kc1_%=         \n\t"
+      "end_kc1_%=:                    \n\t"
+
+      "subs       %[kc2], %[kc2], #1  \n\t"
+      "blt        end_kc2_%=          \n\t"
+      "vld1.32    {q0}, [%[a]]!       \n\t"
+      "vld1.32    {q1}, [%[b]]!       \n\t"
+      "vmla.f32   q10, q1, d0[0]      \n\t"
+      "vmla.f32   q11, q1, d0[1]      \n\t"
+      "vmla.f32   q12, q1, d1[0]      \n\t"
+      "vmla.f32   q13, q1, d1[1]      \n\t"
+      "end_kc2_%=:                    \n\t"
+
+      "cmp        %[mc],      #4      \n\t"
+      "bne        temp_%=             \n\t"
+      "cmp        %[nc],      #4      \n\t"
+      "bne        temp_%=             \n\t"
+
+      "vmov.f32   d8[0],    %[alpha]  \n\t"
+      "vmov.f32   d8[1],    %[beta]   \n\t"
+
+      "cmp        %[flag_alpha],  #1  \n\t"
+      "bne        alpha_%=            \n\t"
+
+      "alpha_%=:                      \n\t"
+      "vmul.f32   q10, q10, d8[0]     \n\t"
+      "vmul.f32   q11, q11, d8[0]     \n\t"
+      "vmul.f32   q12, q12, d8[0]     \n\t"
+      "vmul.f32   q13, q13, d8[0]     \n\t"
+
+      "beta_%=:                       \n\t"
+      "cmp        %[flag_beta],   #0  \n\t"
+      "beq        memory_%=           \n\t"
+
+      "mov        r4,     %[C]        \n\t"
+      "mov        r6,     %[bytes_ldc]\n\t"
+      "vld1.32    {q0}, [r4], r6      \n\t"
+      "vld1.32    {q1}, [r4], r6      \n\t"
+      "vld1.32    {q2}, [r4], r6      \n\t"
+      "vld1.32    {q3}, [r4]          \n\t"
+      "cmp        %[flag_beta],   #1  \n\t"
+      "beq        beta_eq1_%=         \n\t"
+      "bne        beta_ne1_%=         \n\t"
+
+      "beta_eq1_%=:                   \n\t"
+      "vadd.f32   q10, q10, q0        \n\t"
+      "vadd.f32   q11, q11, q1        \n\t"
+      "vadd.f32   q12, q12, q2        \n\t"
+      "vadd.f32   q13, q13, q3        \n\t"
+      "b          memory_%=           \n\t"
+
+      "beta_ne1_%=:                   \n\t"
+      "vmla.f32   q10, q0, d8[1]      \n\t"
+      "vmla.f32   q11, q1, d8[1]      \n\t"
+      "vmla.f32   q12, q2, d8[1]      \n\t"
+      "vmla.f32   q13, q3, d8[1]      \n\t"
+
+      "memory_%=:                     \n\t"
+      "mov        r5,     %[C]        \n\t"
+      "mov        r6,     %[bytes_ldc]\n\t"
+      "vst1.32    {q10}, [r5], r6     \n\t"
+      "vst1.32    {q11}, [r5], r6     \n\t"
+      "vst1.32    {q12}, [r5], r6     \n\t"
+      "vst1.32    {q13}, [r5]         \n\t"
+      "b          end_%=              \n\t"
+
+      "temp_%=:                       \n\t"
+      "vst1.32    {q10, q11}, [%[ab]]!\n\t"
+      "vst1.32    {q12, q13}, [%[ab]] \n\t"
+      "end_%=:                        \n\t"
+      :
+      : [a] "r"(a), [b] "r"(b), [C] "r"(C), [ab] "r"(ab), [kc1] "r"(kc1),
+        [kc2] "r"(kc2), [mc] "r"(mc), [nc] "r"(nc), [alpha] "r"(alpha),
+        [beta] "r"(beta), [bytes_ldc] "r"(bytes_ldc),
+        [flag_alpha] "r"(flag_alpha), [flag_beta] "r"(flag_beta)
+      : "memory", "q0", "q1", "q2", "q3", "q4", "q10", "q11", "q12", "q13");
+
+  if (mc != MR || nc != NR) {
+    int i, j;
+    for (i = 0; i < mc; ++i) {
+      for (j = 0; j < nc; ++j) {
+        if (beta == 0.0) {
+          if (alpha != 1.0) {
+            C(i, j) = alpha * ab[i * MR + j];
+          } else {
+            C(i, j) = ab[i * MR + j];
+          }
+        } else {
+          if (beta != 1.0) {
+            C(i, j) *= beta;
+          }
+          if (alpha != 1.0) {
+            C(i, j) += alpha * ab[i * MR + j];
+          } else {
+            C(i, j) += ab[i * MR + j];
+          }
+        }
+      }
+    }
+  }
+}
+#else
 void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
                int ldb, float beta, float *C, int ldc, int mc, int nc) {
   float c[16] = {0};
   float reg_a0, reg_a1, reg_a2, reg_a3, reg_b0, reg_b1, reg_b2, reg_b3;
 
-  // // init C
-  // float32x4_t cv0 = vdup_n_f32(0.0);
-  // float32x4_t cv1 = vdup_n_f32(0.0);
-  // float32x4_t cv2 = vdup_n_f32(0.0);
-  // float32x4_t cv3 = vdup_n_f32(0.0);
-
   for (int p = 0; p < k; p += 1) {
     reg_b0 = *b++;
     reg_b1 = *b++;
@@ -232,6 +416,7 @@ void AddDot4x4(int k, float alpha, const float *a, int lda, const float *b,
     }
   }
 }
+#endif
 
 // 32位 float 矩阵乘法
 void sgemm(int m, int n, int k, float alpha, const float *A, int lda,
diff --git a/src/operators/math/pool3x3.h b/src/operators/math/pool3x3.h
index 3852b901871eb4cdcff0497a1ad2854abf93b7b6..164958288de5cf3bb37dcb2d37c7fe08b7bd7a1a 100644
--- a/src/operators/math/pool3x3.h
+++ b/src/operators/math/pool3x3.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #pragma once
 
 #if __ARM_NEON
@@ -25,3 +27,5 @@ static void Pool3x3Max() {
 static void Pool3x3Avg() {
   // todo impl with neon
 }
+
+#endif
diff --git a/src/operators/math/pool_2x2.h b/src/operators/math/pool_2x2.h
index 0ed7f4e6abd4f7c78a9f14652fcf662a99d1e549..46e9e36470ceeee39563dc410e63a09aaec973bb 100644
--- a/src/operators/math/pool_2x2.h
+++ b/src/operators/math/pool_2x2.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #pragma once
 
 #if __ARM_NEON
@@ -25,3 +27,5 @@ static void Pool2x2Max() {
 static void Pool2x2Avg() {
   // todo impl with neon
 }
+
+#endif
diff --git a/src/operators/math/pooling.cpp b/src/operators/math/pooling.cpp
index 07afdb7d14a7260e547e072cc67bd1613e812944..11bce0978f789f4f02b44fbb24fdd8bd1219257e 100644
--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
@@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #include "pooling.h"
-#include <common/types.h>
+#include "common/types.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -57,7 +59,7 @@ class PoolFunctor<CPU, PoolProcess, T> {
     T *output_data = output->mutable_data<T>();
 
     for (int i = 0; i < batch_size; i++) {
-#pragma omp parallel for
+      #pragma omp parallel for
       for (int c = 0; c < output_channels; ++c) {
         for (int ph = 0; ph < output_height; ++ph) {
           int hstart = ph * stride_height - padding_height;
@@ -91,3 +93,5 @@ template class PoolFunctor<CPU, math::MaxPool<float>, float>;
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/math/pooling.h b/src/operators/math/pooling.h
index e511fc0518cb755d481b347df449d0e242a58e14..fc6aabb5f13fdedd9dfe9877748aa4d58b3afe36 100644
--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #pragma once
 
 #include "common/log.h"
@@ -64,3 +66,5 @@ class PoolFunctor {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/math/softmax.cpp b/src/operators/math/softmax.cpp
index 224382eb2b78b1653da0cbbd9327cabb4fd9b3d1..a1eb4f13d82376d86da258101b15e6ae5e8bdc97 100644
--- a/src/operators/math/softmax.cpp
+++ b/src/operators/math/softmax.cpp
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+
+#ifdef SOFTMAX_OP
+
 #include "operators/math/softmax.h"
 #include "common/types.h"
 #if __ARM_NEON
@@ -153,3 +156,4 @@ template class SoftmaxFuntor<CPU, float>;
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
diff --git a/src/operators/math/softmax.h b/src/operators/math/softmax.h
index 232497da531a44c14772916fa26328c4b3a1f130..e2ca8f30b067e9262a0e87f4ba5807df07949e73 100644
--- a/src/operators/math/softmax.h
+++ b/src/operators/math/softmax.h
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SOFTMAX_OP
 #pragma once
 #include "framework/tensor.h"
 namespace paddle_mobile {
@@ -26,3 +27,4 @@ class SoftmaxFuntor {
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp
index 80c20122f4b04a3de13a95bc8ed26d48f7464f44..2bd2e0694470518a0220ee020e689e358d70d702 100644
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MUL_OP
+
 #include "mul_op.h"
 
 namespace paddle_mobile {
@@ -55,3 +57,5 @@ template class MulOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(mul);
 REGISTER_OPERATOR(mul, ops::MulOp);
+
+#endif
diff --git a/src/operators/mul_op.h b/src/operators/mul_op.h
index ded618551fca682daea0bacc3635776eeb81301c..5ecf6571ae2725975271d5b0e7212380caa47578 100644
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+
+#ifdef MUL_OP
+
 #pragma once
 
 #include <string>
@@ -25,7 +28,7 @@ template <typename DeviceType, typename T>
 class MulOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   MulOp(const std::string &type, const VariableNameMap &inputs,
-        const VariableNameMap &outputs, const framework::AttributeMap attrs,
+        const VariableNameMap &outputs, const framework::AttributeMap &attrs,
         std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -45,3 +48,5 @@ class MulOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/multiclass_nms_op.cpp b/src/operators/multiclass_nms_op.cpp
index bc796010b231929b3f0c017b68f33b861a84262d..1e4c3f8c34020eeeec2e59cb499b7e00c95edb38 100644
--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MULTICLASSNMS_OP
+
 #include "operators/multiclass_nms_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -39,3 +41,5 @@ template class MultiClassNMSOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(multiclass_nms);
 REGISTER_OPERATOR(multiclass_nms, ops::MultiClassNMSOp);
+
+#endif
diff --git a/src/operators/multiclass_nms_op.h b/src/operators/multiclass_nms_op.h
index c424856b8cdc09b365a7ece28df39a911b6d3af8..37f3742524f64b5bfa97f78a55f86b4264489dd5 100644
--- a/src/operators/multiclass_nms_op.h
+++ b/src/operators/multiclass_nms_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef MULTICLASSNMS_OP
+
 #pragma once
 
 #include <string>
@@ -30,7 +32,7 @@ class MultiClassNMSOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   MultiClassNMSOp(const std::string &type, const VariableNameMap &inputs,
                   const VariableNameMap &outputs,
-                  const framework::AttributeMap attrs,
+                  const framework::AttributeMap &attrs,
                   std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -50,3 +52,5 @@ class MultiClassNMSOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/op_param.cpp b/src/operators/op_param.cpp
index ac6ae4cdef77af623097bf6a6d1e73f55339a71a..3045ce4d087bad48927fd3054ef7c2941587b5e2 100644
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -13,9 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "op_param.h"
-
 namespace paddle_mobile {
 namespace operators {
+
+#ifdef CONV_OP
 Print &operator<<(Print &printer, const ConvParam &conv_param) {
   printer << "parameter of conv: "
           << "\n";
@@ -36,5 +37,7 @@ Print &operator<<(Print &printer, const ConvParam &conv_param) {
   printer << "  output dims: " << conv_param.Output()->dims();
   return printer;
 }
+#endif
+
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 0ce187c084975c53e433b9428ad14bf11212a5a1..75f6f5ee2215bd5785c791ab8b1b99adf6fa98ae 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -34,7 +34,7 @@ using framework::Tensor;
 using std::string;
 using std::vector;
 
-class OpParam : PaddleMobileObject {
+class OpParam {
  protected:
   template <typename T>
   static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
@@ -167,8 +167,6 @@ class OpParam : PaddleMobileObject {
                         const Scope &scope) {
     auto var_vec = var_map.at(key);
     if (!var_vec.empty()) {
-      //      std::cout << " get var value -- " << var_vec[0] <<
-      //      std::endl;
       auto var = scope.FindVar(var_vec[0]);
       return var->GetMutable<T>();
     } else {
@@ -191,6 +189,7 @@ class OpParam : PaddleMobileObject {
   }
 };
 
+#ifdef CONV_OP
 class ConvParam : OpParam {
  public:
   ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -230,7 +229,9 @@ class ConvParam : OpParam {
 };
 
 Print &operator<<(Print &printer, const ConvParam &conv_param);
+#endif
 
+#ifdef ELEMENTWISEADD_OP
 class ElementwiseAddParam : OpParam {
  public:
   ElementwiseAddParam(const VariableNameMap &inputs,
@@ -258,6 +259,9 @@ class ElementwiseAddParam : OpParam {
   int axis_;
 };
 
+#endif
+
+#ifdef MUL_OP
 class MulParam : OpParam {
  public:
   MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -287,7 +291,9 @@ class MulParam : OpParam {
   int x_num_col_dims_;
   int y_num_col_dims_;
 };
+#endif
 
+#ifdef CONCAT_OP
 class ConcatParam : public OpParam {
  public:
   ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -309,7 +315,9 @@ class ConcatParam : public OpParam {
   Tensor *out_;
   int axis_;
 };
+#endif
 
+#ifdef LRN_OP
 class LrnParam : public OpParam {
  public:
   LrnParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -351,6 +359,9 @@ class LrnParam : public OpParam {
   float k_;
   string data_format_;
 };
+#endif
+
+#ifdef BATCHNORM_OP
 class BatchNormParam : OpParam {
  public:
   BatchNormParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -399,6 +410,9 @@ class BatchNormParam : OpParam {
   bool is_test_;
   string data_format_;
 };
+#endif
+
+#ifdef POOL_OP
 class PoolParam : public OpParam {
  public:
   PoolParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -442,6 +456,9 @@ class PoolParam : public OpParam {
   bool gloabal_pooling_ = false;
 };
 
+#endif
+
+#ifdef PRIORBOX_OP
 class PriorBoxParam : public OpParam {
  public:
   PriorBoxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -503,7 +520,9 @@ class PriorBoxParam : public OpParam {
   float step_h_;
   float offset_;
 };
+#endif
 
+#ifdef BOXCODER_OP
 class BoxCoderParam : public OpParam {
  public:
   BoxCoderParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -533,7 +552,9 @@ class BoxCoderParam : public OpParam {
   Tensor *output_box_;
   std::string code_type_;
 };
+#endif
 
+#ifdef SOFTMAX_OP
 class SoftmaxParam : public OpParam {
  public:
   SoftmaxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -549,7 +570,9 @@ class SoftmaxParam : public OpParam {
   Tensor *input_x_;
   Tensor *out_;
 };
+#endif
 
+#ifdef SIGMOID_OP
 class SigmoidParam : public OpParam {
  public:
   SigmoidParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -565,6 +588,9 @@ class SigmoidParam : public OpParam {
   Tensor *input_x_;
   Tensor *out_;
 };
+#endif
+
+#ifdef MULTICLASSNMS_OP
 class MultiClassNMSParam : public OpParam {
  public:
   MultiClassNMSParam(const VariableNameMap &inputs,
@@ -610,6 +636,7 @@ class MultiClassNMSParam : public OpParam {
   float nms_eta_;
   float score_threshold_;
 };
+#endif
 
 class FeedParam : public OpParam {
  public:
@@ -646,6 +673,7 @@ class FetchParam : public OpParam {
   Tensor *out_;
 };
 
+#ifdef TRANSPOSE_OP
 class TransposeParam : public OpParam {
  public:
   TransposeParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -666,7 +694,9 @@ class TransposeParam : public OpParam {
   Tensor *out_;
   vector<int> axis_;
 };
+#endif
 
+#ifdef RESHAPE_OP
 class ReshapeParam : public OpParam {
  public:
   ReshapeParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -695,7 +725,9 @@ class ReshapeParam : public OpParam {
   vector<int> shape_;
   bool inplace_;
 };
+#endif
 
+#ifdef RELU_OP
 /*
  * @b op 层实例化好这个 param 传递给 kernel 层使用
  * */
@@ -715,7 +747,9 @@ class ReluParam : public OpParam {
   Tensor *input_x_;
   Tensor *out_;
 };
+#endif
 
+#ifdef FUSION_FC_OP
 class FushionFcParam : public OpParam {
  public:
   FushionFcParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
@@ -751,6 +785,7 @@ class FushionFcParam : public OpParam {
   int y_num_col_dims_;
   int axis_;
 };
+#endif
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp
index 3096199dc3e3157f9fa0048ad35f796e24113f28..45a709d20794e9fdfad15f0a3ec499d3f32d5bf9 100644
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -12,7 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #include "pool_op.h"
+#include "framework/op_proto_maker.h"
+#include "framework/op_registry.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -57,3 +61,5 @@ template class PoolOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(pool2d);
 REGISTER_OPERATOR(pool2d, ops::PoolOp);
+
+#endif
diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h
index ff44771c56151acf699b017ddf834a2d32e07761..8dc99ae686390041b3c99c2df71d91ae9801a1f2 100644
--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -12,13 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef POOL_OP
+
 #pragma once
 
-#include <framework/operator.h>
-#include <operators/kernel/pool_kernel.h>
-#include <operators/op_param.h>
 #include <string>
 
+#include "framework/operator.h"
+#include "operators/kernel/pool_kernel.h"
+#include "operators/op_param.h"
+
 namespace paddle_mobile {
 namespace operators {
 using framework::AttributeMap;
@@ -47,3 +50,5 @@ class PoolOp : public OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/prior_box_op.cpp b/src/operators/prior_box_op.cpp
index 3928c3db53414dbb3ef9a6ae4ebe5527dc5eeeca..22f9326b00f41a96de2f6ce3d79f8cbee98fd9f4 100644
--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef PRIORBOX_OP
+
 #include "operators/prior_box_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -49,3 +51,5 @@ template class PriorBoxOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(prior_box);
 REGISTER_OPERATOR(prior_box, ops::PriorBoxOp);
+
+#endif
diff --git a/src/operators/prior_box_op.h b/src/operators/prior_box_op.h
index 84481e602a6cb4143a50760e66b0d430b8a1c719..e3de58b372cc101956d83ff39b02e172c990b254 100644
--- a/src/operators/prior_box_op.h
+++ b/src/operators/prior_box_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef PRIORBOX_OP
+
 #pragma once
 
 #include <string>
@@ -30,7 +32,7 @@ class PriorBoxOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   PriorBoxOp(const std::string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
-             const framework::AttributeMap attrs,
+             const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -50,3 +52,5 @@ class PriorBoxOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp
index 21bcc605282ffc590025e87b609cccc855a631d1..3beac260935ce2daf8a5b9f1e6b9be178034ac8d 100644
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RELU_OP
+
 #include "operators/relu_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -33,3 +35,5 @@ template class ReluOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(relu);
 REGISTER_OPERATOR(relu, ops::ReluOp);
+
+#endif
diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h
index 7be8cd249cb22255dff237da6c8653e6237bbc3f..f032546c82d740c179385434b5d72082e4bd5a9d 100644
--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RELU_OP
+
 #pragma once
 
 #include <string>
@@ -32,7 +34,7 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
    * @b op 的实例化方法, 需要调用父类的实例化方法, 以及实例化自己的参数结构体
    * */
   ReluOp(const std::string &type, const VariableNameMap &inputs,
-         const VariableNameMap &outputs, const framework::AttributeMap attrs,
+         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
          std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -59,3 +61,5 @@ class ReluOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp
index 6562b7a5eb491a7e69e9bd9481251b8aaf9f3f4b..44d3de2203cc01f6a6acd6810f4e676f6efb6bbd 100644
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RESHAPE_OP
+
 #include "operators/reshape_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -32,3 +34,5 @@ template class ReshapeOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(reshape);
 REGISTER_OPERATOR(reshape, ops::ReshapeOp);
+
+#endif
diff --git a/src/operators/reshape_op.h b/src/operators/reshape_op.h
index b244e62a930a0e6a98d56fe06a4e4a7e37f7d5e1..a14c84b6be95a1d86ac645563dc21c21a51ca6d4 100644
--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef RESHAPE_OP
+
 #pragma once
 
 #include <string>
@@ -29,7 +31,8 @@ template <typename DeviceType, typename T>
 class ReshapeOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   ReshapeOp(const std::string &type, const VariableNameMap &inputs,
-            const VariableNameMap &outputs, const framework::AttributeMap attrs,
+            const VariableNameMap &outputs,
+            const framework::AttributeMap &attrs,
             std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -49,3 +52,5 @@ class ReshapeOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/sigmoid_op.cpp b/src/operators/sigmoid_op.cpp
index 6bff80a35aa019a7b05f6e9b58c49e13fb8f1bc8..8be9309d1047a1d892c0c0151375a8baa01cbca3 100644
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SIGMOID_OP
+
 #include "operators/sigmoid_op.h"
 
 namespace paddle_mobile {
@@ -27,3 +29,5 @@ template class SigmoidOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(sigmoid);
 REGISTER_OPERATOR(sigmoid, ops::SigmoidOp);
+
+#endif
diff --git a/src/operators/sigmoid_op.h b/src/operators/sigmoid_op.h
index f631ba51759ea31f91ddcdf7c90a0dc874e86b20..7cdeb41af1b20ddf05ac80d7de0962c4bfe8dff4 100644
--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
@@ -12,12 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SIGMOID_OP
+
 #pragma once
 
-#include <framework/operator.h>
-#include <operators/op_param.h>
 #include <string>
+
+#include "framework/operator.h"
 #include "operators/kernel/sigmoid_kernel.h"
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -47,3 +50,5 @@ class SigmoidOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp
index c353d0b882cb8f0682f9e4710ff05c32ca68e685..5973647bfd1624fc4bb71b8112c5d7f8bf9665cd 100644
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SOFTMAX_OP
+
 #include "operators/softmax_op.h"
 
 namespace paddle_mobile {
@@ -27,3 +29,5 @@ template class SoftmaxOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(softmax);
 REGISTER_OPERATOR(softmax, ops::SoftmaxOp);
+
+#endif
diff --git a/src/operators/softmax_op.h b/src/operators/softmax_op.h
index 07fd9b945cb29cecd6f4d629b6be58035f971ce4..5cac4d8a3394b07e978ba41e18fd7fbb7f4756d7 100644
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -12,12 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef SOFTMAX_OP
+
 #pragma once
 
-#include <framework/operator.h>
-#include <operators/op_param.h>
 #include <string>
+
+#include "framework/operator.h"
 #include "operators/kernel/softmax_kernel.h"
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -47,3 +50,5 @@ class SoftmaxOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/transpose_op.cpp b/src/operators/transpose_op.cpp
index e21338bf1b59981e914ca4a8e1781e02254bc00c..3abebc77ba6733bf1bfa73846ec1a077a494de2f 100644
--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "operators/transpose_op.h"
-#include <common/enforce.h>
+#ifdef TRANSPOSE_OP
+
 #include <vector>
+
+#include "common/enforce.h"
+#include "operators/transpose_op.h"
 namespace paddle_mobile {
 namespace operators {
 
@@ -51,3 +54,5 @@ template class TransposeOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(transpose);
 REGISTER_OPERATOR(transpose, ops::TransposeOp);
+
+#endif
diff --git a/src/operators/transpose_op.h b/src/operators/transpose_op.h
index 0f67339533261f98374c6257494278306f3a7208..f65a725756c858b8e2e304906ed8236b00046fc9 100644
--- a/src/operators/transpose_op.h
+++ b/src/operators/transpose_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#ifdef TRANSPOSE_OP
+
 #pragma once
 
 #include <string>
@@ -30,7 +32,7 @@ class TransposeOp : public framework::OperatorWithKernel<DeviceType> {
  public:
   TransposeOp(const std::string &type, const VariableNameMap &inputs,
               const VariableNameMap &outputs,
-              const framework::AttributeMap attrs,
+              const framework::AttributeMap &attrs,
               std::shared_ptr<framework::Scope> scope)
       : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                   scope),
@@ -50,3 +52,5 @@ class TransposeOp : public framework::OperatorWithKernel<DeviceType> {
 
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
diff --git a/src/platform/data_type.h b/src/platform/data_type.h
deleted file mode 100644
index 44e0158a7cd7f912689f8514c9c8cfddae5654a1..0000000000000000000000000000000000000000
--- a/src/platform/data_type.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <string>
-#include <typeindex>
-
-#include "framework/program/tensor_desc.h"
-
-namespace paddle_mobile {
-namespace framework {
-
-inline VarType_Type ToDataType(std::type_index type) {
-  /*if (typeid(platform::float16).hash_code() == type.hash_code()) {
-    return proto::VarType::FP16;
-  } else */
-  if (typeid(const float).hash_code() == type.hash_code()) {
-    // CPPLint complains Using C-style cast.  Use
-    // static_cast<float>() instead
-    // One fix to this is to replace float with const float because
-    // typeid(T) == typeid(const T)
-    // http://en.cppreference.com/w/cpp/language/typeid
-    return VARTYPE_TYPE_FP32;
-  } else if (typeid(const double).hash_code() == type.hash_code()) {
-    return VARTYPE_TYPE_FP64;
-  } else if (typeid(const int).hash_code() == type.hash_code()) {
-    return VARTYPE_TYPE_INT32;
-  } else if (typeid(const int64_t).hash_code() == type.hash_code()) {
-    return VARTYPE_TYPE_INT64;
-  } else if (typeid(const bool).hash_code() == type.hash_code()) {
-    return VARTYPE_TYPE_BOOL;
-  } else {
-    //    PADDLE_THROW("Not supported");
-    //    std::cout << "Not supported";
-  }
-}
-
-inline std::type_index ToTypeIndex(VarType_Type type) {
-  switch (type) {
-    //    case proto::VarType::FP16:
-    //      return typeid(platform::float16);
-    case VARTYPE_TYPE_FP32:
-      return typeid(float);
-    case VARTYPE_TYPE_FP64:
-      return typeid(double);
-    case VARTYPE_TYPE_INT32:
-      return typeid(int);
-    case VARTYPE_TYPE_INT64:
-      return typeid(int64_t);
-    case VARTYPE_TYPE_BOOL:
-      return typeid(bool);
-    default:
-      //      PADDLE_THROW("Not support type %d", type);
-      printf("Not support type %d", type);
-  }
-}
-
-template <typename Visitor>
-inline void VisitDataType(VarType_Type type, Visitor visitor) {
-  switch (type) {
-    //    case proto::VarType::FP16:
-    //      visitor.template operator()<platform::float16>();
-    //      break;
-    case VARTYPE_TYPE_FP32:
-      visitor.template operator()<float>();
-      break;
-    case VARTYPE_TYPE_FP64:
-      visitor.template operator()<double>();
-      break;
-    case VARTYPE_TYPE_INT32:
-      visitor.template operator()<int>();
-      break;
-    case VARTYPE_TYPE_INT64:
-      visitor.template operator()<int64_t>();
-      break;
-    case VARTYPE_TYPE_BOOL:
-      visitor.template operator()<bool>();
-      break;
-    default:
-      //      PADDLE_THROW("Not supported");
-      printf("Not supported");
-  }
-}
-
-inline std::string DataTypeToString(const VarType_Type type) {
-  switch (type) {
-    case VARTYPE_TYPE_FP16:
-      return "float16";
-    case VARTYPE_TYPE_FP32:
-      return "float32";
-    case VARTYPE_TYPE_FP64:
-      return "float64";
-    case VARTYPE_TYPE_INT16:
-      return "int16";
-    case VARTYPE_TYPE_INT32:
-      return "int32";
-    case VARTYPE_TYPE_INT64:
-      return "int64";
-    case VARTYPE_TYPE_BOOL:
-      return "bool";
-    default:
-      //      PADDLE_THROW("Not support type %d", type);
-      printf("Not support type %d", type);
-  }
-}
-
-inline std::ostream &operator<<(std::ostream &out, const VarType_Type &type) {
-  out << DataTypeToString(type);
-  return out;
-}
-
-}  // namespace framework
-}  // namespace paddle_mobile
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c71306281e3354cd1856ecaa7278266b031b665c..124e1c2d60d81fe0d6f19ffa1936f3d1d8e7eb16 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,116 +1,140 @@
-
 set(dir ${CMAKE_CURRENT_SOURCE_DIR})
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
 
-# gen test
-ADD_EXECUTABLE(test-conv-op  operators/test_cov_op.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-conv-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-mul-op  operators/test_mul_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-mul-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-elementwiseadd-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-concat-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-lrn-op  operators/test_lrn_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-lrn-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-batchnorm-op  operators/test_batchnorm_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-batchnorm-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-priorbox-op  operators/test_prior_box_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-priorbox-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-boxcoder-op  operators/test_box_coder_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-boxcoder-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-transpose-op  operators/test_transpose_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-transpose-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-multiclassnms-op  operators/test_multiclass_nms_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-multiclassnms-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-reshape-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-relu-op paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-fc-op  operators/test_fushion_fc_op.cpp test_helper.h  test_include.h)
-target_link_libraries(test-fc-op paddle-mobile)
-
-# gen test log
-ADD_EXECUTABLE(test-log common/test_log.cpp)
-target_link_libraries(test-log paddle-mobile)
-
-# gen test log
-ADD_EXECUTABLE(test-load framework/test_load.cpp)
-target_link_libraries(test-load paddle-mobile)
-
-# gen test log
-# gen test
-ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
-target_link_libraries(test-optimize paddle-mobile)
-
-
-#gen test
-ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
-target_link_libraries(test-pool paddle-mobile)
-
-#gen test
-ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
-target_link_libraries(test-softmax paddle-mobile)
+if (googlenet)
+    # gen test
+    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-googlenet paddle-mobile)
+elseif (mobilenet)
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet paddle-mobile)
+elseif (yolo)
+    # gen test
+    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-yolo paddle-mobile)
+elseif (squeezenet)
+    # gen test
+    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-squeezenet paddle-mobile)
+elseif(resnet)
+    # gen test
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet paddle-mobile)
+else ()
+
+    # gen test
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-squeezenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-yolo paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-googlenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-conv-op  operators/test_cov_op.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-conv-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mul-op  operators/test_mul_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-mul-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-elementwiseadd-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-concat-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-lrn-op  operators/test_lrn_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-lrn-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-batchnorm-op  operators/test_batchnorm_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-batchnorm-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-priorbox-op  operators/test_prior_box_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-priorbox-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-boxcoder-op  operators/test_box_coder_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-boxcoder-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-transpose-op  operators/test_transpose_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-transpose-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-multiclassnms-op  operators/test_multiclass_nms_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-multiclassnms-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-reshape-op  operators/test_reshape_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-reshape-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-relu-op  operators/test_relu_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-relu-op paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-fc-op  operators/test_fushion_fc_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-fc-op paddle-mobile)
+
+    # gen test log
+    ADD_EXECUTABLE(test-log common/test_log.cpp)
+    target_link_libraries(test-log paddle-mobile)
+
+    # gen test log
+    ADD_EXECUTABLE(test-load framework/test_load.cpp)
+    target_link_libraries(test-load paddle-mobile)
+
+    # gen test log
+    # gen test
+    ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp)
+    target_link_libraries(test-optimize paddle-mobile)
+
+
+    #gen test
+    ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-pool paddle-mobile)
+
+    #gen test
+    ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-softmax paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-gemm common/test_gemm.cpp)
+    target_link_libraries(test-gemm paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-gemm common/test_gemm.cpp)
-target_link_libraries(test-gemm paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
+    target_link_libraries(test-enforce paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-enforce common/test_enforce.cpp)
-target_link_libraries(test-enforce paddle-mobile)
+    # gen test - test if openmp works
+    ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-openmp paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-yolo paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenetssd paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-googlenet paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
+    target_link_libraries(test-sigmoid paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-mobilenet paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
+    target_link_libraries(test-depthwise-conv-op paddle-mobile)
 
-# gen test
-ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-resnet paddle-mobile)
-# gen test
-ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-mobilenetssd paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
-target_link_libraries(test-squeezenet paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
-target_link_libraries(test-sigmoid paddle-mobile)
-
-# gen test
-ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
-target_link_libraries(test-depthwise-conv-op paddle-mobile)
+endif()
diff --git a/test/common/test_gemm.cpp.cpp b/test/common/test_gemm.cpp
similarity index 100%
rename from test/common/test_gemm.cpp.cpp
rename to test/common/test_gemm.cpp
diff --git a/src/framework/program/tensor_desc.cpp b/test/common/test_lib_size.cpp
similarity index 86%
rename from src/framework/program/tensor_desc.cpp
rename to test/common/test_lib_size.cpp
index 1b4bd93f6f19426407868052e5366ebeeaedda69..805668f359f0e0959ea7122f25cdaa0ad2d7ec77 100644
--- a/src/framework/program/tensor_desc.cpp
+++ b/test/common/test_lib_size.cpp
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 //
-// Created by liuRuiLong on 2018/5/26.
+// Created by liuRuiLong on 2018/6/6.
 //
 
-#include "tensor_desc.h"
+#include "test_lib_size.h"
+
+static test_lib_size t;
diff --git a/test/common/test_lib_size.h b/test/common/test_lib_size.h
new file mode 100644
index 0000000000000000000000000000000000000000..ab5cd359b73f1c5c5df4176ab444e48c85dbd95b
--- /dev/null
+++ b/test/common/test_lib_size.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+//
+// Created by liuRuiLong on 2018/6/6.
+//
+
+#ifndef PADDLE_MOBILE_TEST_LIB_SIZE_H
+#define PADDLE_MOBILE_TEST_LIB_SIZE_H
+
+#include <vector>
+//#include <list>
+//#include <tuple>
+//#include <typeinfo>
+//#include <mutex>
+//#include <initializer_list>
+//#include <map>
+//#include <string>
+//#include <unordered_map>
+//#include <unordered_set>
+//#include <algorithm>
+
+//#include <iostream>
+//#include <sstream>
+#include <memory>
+//#include <stdio.h>
+//#include <cstring>
+
+void foo() {
+  //  char *str = "1234";
+  //  char dst[10];
+  //  strcpy(dst, str);
+
+  //  std::cout << "12345" << std::endl;
+  std::vector<int> vec = {1, 2, 3, 4, 5};
+
+  //  std::find(vec.begin(), vec.end(), 1);
+  //  std::find(vec.begin(), vec.end(), 1);
+
+  //  std::list<int> l;
+  //  std::mutex mutex_;
+
+  //  std::map<int, float> m;
+  //  std::unordered_map<int, float> u_m;
+  //  std::unordered_set<int> u_s;
+  //  std::string ss = "12345";
+  //  printf("%f", ss.c_str());
+
+  //  std::initializer_list<int> init_list = {1, 2};
+  //  std::tuple<int, int> t = {1, 2};
+
+  //  std::tuple_element<I, std::tuple<ARGS...>>::type
+
+  //  std::tuple<>
+
+  //  int i;
+  //  int j;
+  //  if (typeid(i) == typeid(j)){
+  //    int z = 10;
+  //  }
+
+  std::shared_ptr<int> s1 = std::make_shared<int>();
+
+  //  std::stringstream ss;
+  //  ss << "12345";
+}
+
+class test_lib_size {
+ public:
+  test_lib_size() {}
+  //  std::shared_ptr<int> Test(){
+  //    std::vector<int> vec = {1, 2, 3};
+  //    std::shared_ptr<int> si = std::make_shared<int>();
+  //    return si;
+  //  }
+
+  //  void test(){
+  //    int i = 9;
+  //  }
+};
+
+#endif  // PADDLE_MOBILE_TEST_LIB_SIZE_H
diff --git a/src/platform/hostdevice.h b/test/common/test_openmp.cpp
similarity index 63%
rename from src/platform/hostdevice.h
rename to test/common/test_openmp.cpp
index 6139fb94b998b2a9b261064d1b0428e0c65cf69e..790c434101e20478853b7079533403d65dc829ba 100644
--- a/src/platform/hostdevice.h
+++ b/test/common/test_openmp.cpp
@@ -12,14 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
+//#include <omp.h>
+#include <iostream>
 
-#ifdef __CUDACC__
-#define HOSTDEVICE __host__ __device__
-#define DEVICE __device__
-#define HOST __host__
-#else
-#define HOSTDEVICE
-#define DEVICE
-#define HOST
+int main(void) {
+#ifdef PADDLE_MOBILE_USE_OPENMP
+  #pragma omp parallel num_threads(2)
+  {
+    //        int thread_id = omp_get_thread_num();
+    //        int nthreads = omp_get_num_threads();
+    //        std::cout << "Hello, OMP " << thread_id << "/" << nthreads <<
+    //        "\n";
+  }
 #endif
+  return 0;
+}
diff --git a/test/executor_for_test.h b/test/executor_for_test.h
index ce3c84e986eb7ef5e9602209cedb3dbabbf06e85..f8c333779be4a56966ea6ca810b36a5f52c9383d 100644
--- a/test/executor_for_test.h
+++ b/test/executor_for_test.h
@@ -19,7 +19,7 @@ limitations under the License. */
 
 #include "common/log.h"
 #include "framework/op_registry.h"
-#include "io.h"
+#include "io/io.h"
 #include "operators/conv_op.h"
 #include "operators/elementwise_add_op.h"
 #include "operators/pool_op.h"
diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp
index 95357547e1b93d3060481b55eaf46c919496785d..2300f05c99a122b352d888a45ca3c6ef082469ba 100644
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -13,13 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "../test_helper.h"
-#include "io.h"
+#include "io/io.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
-  auto program = loader.Load(g_googlenet);
-  program.optimizeProgram->Description("program desc: ");
+  auto program = loader.Load(g_resnet, true);
+  loader.Load(g_googlenet_combine + "/model", g_googlenet_combine + "/params",
+              true);
+
+  program.originProgram->Description("program desc: ");
   return 0;
 }
diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp
index f0392cfec02c8ea764cd3d6dc9f50b2415c39e2c..2cb920da616110b9415c494140e48e3479845851 100644
--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
@@ -15,17 +15,17 @@ limitations under the License. */
 #include "../test_helper.h"
 #include "framework/program/program-optimize/node.h"
 #include "framework/program/program-optimize/program_optimize.h"
-#include "io.h"
+#include "io/io.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   //    "../../../test/models/googlenet"
-  auto program = loader.Load(g_googlenet);
+  auto program = loader.Load(g_mobilenet_ssd, true);
   paddle_mobile::framework::ProgramOptimize optimize;
   //  program.originProgram->Description("origin");
   auto optimize_program = optimize.FushionOptimize(program.originProgram);
   if (optimize_program != nullptr) {
-    optimize_program->Description("optimize");
+    //    optimize_program->Description("optimize");
   } else {
     LOG(paddle_mobile::kLOG_ERROR) << "optimize_program is null";
   }
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index 302cd3e726eeb99c50c3adf7e3b9117a05cf0560..ab4fd2fe0d1eaaa58fabc38fbf512a0b860c36f0 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -18,9 +18,11 @@ limitations under the License. */
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  bool optimize = true;
+  bool optimize = false;
   auto time1 = time();
-  auto program = loader.Load(g_googlenet, optimize);
+  //  auto program = loader.Load(g_googlenet, optimize);
+  auto program = loader.Load(g_googlenet_combine + "/model",
+                             g_googlenet_combine + "/params", optimize);
   auto time2 = time();
   DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
   paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, optimize);
diff --git a/test/net/test_mobilenet+ssd.cpp b/test/net/test_mobilenet+ssd.cpp
index e9d92e7a51b9f7abe2c451df4073428bd2bd6d5f..097d03ad710468a881050ff729e8352f029d664f 100644
--- a/test/net/test_mobilenet+ssd.cpp
+++ b/test/net/test_mobilenet+ssd.cpp
@@ -19,10 +19,10 @@ limitations under the License. */
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   auto time1 = time();
-  auto program = loader.Load(g_mobilenet_ssd, false);
+  auto program = loader.Load(g_mobilenet_ssd, true);
   auto time2 = time();
   DLOG << "load cost :" << time_diff(time1, time1) << "ms";
-  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, false);
+  paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);
 
   std::vector<int64_t> dims{1, 3, 300, 300};
   Tensor input_tensor;
diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp
index 7a106b03c44c57fa7ef0f9282434717efd602b5c..edaa4ce1ddba251886c90262895333b0a56c3a07 100644
--- a/test/operators/test_concat_op.cpp
+++ b/test/operators/test_concat_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 #include "operators/concat_op.h"
 
diff --git a/test/operators/test_cov_op.cpp b/test/operators/test_cov_op.cpp
index ba6a9b4800f8b2acb3a5c3b0992128bd4ea0e619..8057430309cac4da88e80518bf8ab0f5afd79ce5 100644
--- a/test/operators/test_cov_op.cpp
+++ b/test/operators/test_cov_op.cpp
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
+#include "operators/conv_op.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
diff --git a/test/operators/test_depthwise_conv_op.cpp b/test/operators/test_depthwise_conv_op.cpp
index 648b4c5db9970804a2ca140eef13e2560e36f935..bd2aad19eda896bad3da8a47f5b70b1a923dc1a7 100644
--- a/test/operators/test_depthwise_conv_op.cpp
+++ b/test/operators/test_depthwise_conv_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 #include "operators/depthwise_conv_op.h"
 
diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp
index c4997f2eb37730e1af38fbe8aac927e7ee2b6ee0..0a5e9f7e92701e748df51078b21eb46eec90599d 100644
--- a/test/operators/test_elementwise_add_op.cpp
+++ b/test/operators/test_elementwise_add_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 
 int main() {
diff --git a/test/operators/test_lrn_op.cpp b/test/operators/test_lrn_op.cpp
index cf5fd4bdf2d45abcf63eb865f1cf333eeb14eafc..d4d9f8da802fc0f5f885a3b2e81cba695776c29e 100644
--- a/test/operators/test_lrn_op.cpp
+++ b/test/operators/test_lrn_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 #include "operators/lrn_op.h"
 
diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp
index 5412e6905b7c12782555c7271c5da17713561469..8ebf0926890497c0ed622b69f163a9f6f5c8612b 100644
--- a/test/operators/test_mul_op.cpp
+++ b/test/operators/test_mul_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 #include "operators/mul_op.h"
 
diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp
index 62dfc20dc12006f86b16997cb6de96123e10ee9c..2daecd7b4c1a50c612bc784c801208d2e6f31482 100644
--- a/test/operators/test_pool_op.cpp
+++ b/test/operators/test_pool_op.cpp
@@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
-#include "../test_helper.h"
-#include "io.h"
+#include "../test_include.h"
+#include "operators/pool_op.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
diff --git a/test/operators/test_relu_op.cpp b/test/operators/test_relu_op.cpp
index 50f3b6a20b6244fcb39975c80cc6a6e14dc88d1c..fad0d0c30a126cc2730e4aa8b87364eee9fc8209 100644
--- a/test/operators/test_relu_op.cpp
+++ b/test/operators/test_relu_op.cpp
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_include.h"
 #include "operators/relu_op.h"
 
diff --git a/test/operators/test_reshape_op.cpp b/test/operators/test_reshape_op.cpp
index 5448aac87c23ea90f5b8beec24aee9cc6f437330..3541151d8a1a286527e715f402df381d2efc094c 100644
--- a/test/operators/test_reshape_op.cpp
+++ b/test/operators/test_reshape_op.cpp
@@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
-#include "../test_helper.h"
-#include "io.h"
+#include "../test_include.h"
+#include "operators/reshape_op.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp
index 289eac149fa2d3e05f65624f8a9e5f93e85c6fff..4ed3efaf28aa986f0b679729c46cb386150583e3 100644
--- a/test/operators/test_sigmoid_op.cpp
+++ b/test/operators/test_sigmoid_op.cpp
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #include "../../src/operators/kernel/sigmoid_kernel.h"
 #include "../test_helper.h"
-#include "io.h"
+#include "io/io.h"
 
 int main() {
   paddle_mobile::framework::Tensor input;
diff --git a/test/operators/test_softmax_op.cpp b/test/operators/test_softmax_op.cpp
index 58de5300cca0bf367652066851bc4e7e9f75389c..a0184729a8bc5e6b0ba952923eecd5242cfe36d4 100644
--- a/test/operators/test_softmax_op.cpp
+++ b/test/operators/test_softmax_op.cpp
@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
-#include "../test_helper.h"
-#include "io.h"
+#include "../test_include.h"
+
+#include "operators/softmax_op.h"
 
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
diff --git a/test/operators/test_transpose_op.cpp b/test/operators/test_transpose_op.cpp
index 4c88df2d83dcfbc44915ced815b50f90ddb33b38..f83ee23c25d8f2588e0fe40d5fabc6114129b995 100644
--- a/test/operators/test_transpose_op.cpp
+++ b/test/operators/test_transpose_op.cpp
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "../executor_for_test.h"
 #include "../test_helper.h"
-#include "io.h"
-
+#include "../test_include.h"
+#include "operators/transpose_op.h"
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   auto program = loader.Load(std::string(g_mobilenet_ssd));
diff --git a/test/test_helper.h b/test/test_helper.h
index fc4ed6c91dc9b03c1f4dadfd8a4bc94efe3a724e..fe720ded8270f2bc02a4f1e72625954962184069 100644
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -22,12 +22,13 @@ limitations under the License. */
 #include "framework/ddim.h"
 #include "framework/tensor.h"
 
-static const std::string g_googlenet = "../models/googlenet";
-static const std::string g_mobilenet = "../models/mobilenet";
 static const std::string g_mobilenet_ssd = "../models/mobilenet+ssd";
 static const std::string g_squeezenet = "../models/squeezenet";
-static const std::string g_resnet =
-    "../models/image_classification_resnet.inference.model";
+static const std::string g_googlenet = "../models/googlenet";
+static const std::string g_mobilenet = "../models/mobilenet";
+static const std::string g_resnet_50 = "../models/resnet_50";
+static const std::string g_resnet = "../models/resnet";
+static const std::string g_googlenet_combine = "../models/googlenet_combine";
 static const std::string g_yolo = "../models/yolo";
 static const std::string g_test_image_1x3x224x224 =
     "../images/test_image_1x3x224x224_float";
diff --git a/test/test_include.h b/test/test_include.h
index 25efbb9f4c00921495a5ab054acdde329c4ef58a..2d89dc8c9ed1de1ad49ebca07724b6649e2a12a7 100644
--- a/test/test_include.h
+++ b/test/test_include.h
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "./test_helper.h"
 #include "common/enforce.h"
 #include "common/log.h"
+#include "executor_for_test.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
 #include "framework/program/block_desc.h"
@@ -29,4 +30,4 @@ limitations under the License. */
 #include "framework/scope.h"
 #include "framework/tensor.h"
 #include "framework/variable.h"
-#include "io.h"
+#include "io/io.h"
diff --git a/tools/build.sh b/tools/build.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aa59bd3d2834fc83db7bed24e7ee4ac7ea132294
--- /dev/null
+++ b/tools/build.sh
@@ -0,0 +1,176 @@
+#!/usr/bin/env bash
+
+build_for_mac() {
+    if [ ! `which brew` ]; then
+        echo "building failed! homebrew not found, please install homebrew."
+        return
+    fi
+    if [ ! `which cmake` ]; then
+        echo "installing cmake."
+        brew install cmake
+        if [ ! $? ]; then
+            echo "cmake install failed."
+            return
+        fi
+    fi
+    PLATFORM="x86"
+    MODE="Release"
+    CXX_FLAGS="-std=c++11 -O3 -s"
+    BUILD_DIR=../build/release/"${PLATFORM}"
+    mkdir -p ${BUILD_DIR}/build
+
+    mkdir -p ${BUILD_DIR}/test
+    cp -r ../test/models ${BUILD_DIR}/test/models
+
+    cmake .. \
+        -B"${BUILD_DIR}" \
+    	-DCMAKE_BUILD_TYPE="${MODE}" \
+    	-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+    	-DIS_MAC=true
+
+    cd ${BUILD_DIR}
+    make -j 8
+}
+
+build_for_android() {
+    rm -rf "../build"
+    if [ -z "${ANDROID_NDK}" ]; then
+        echo "ANDROID_NDK not found!"
+        exit -1
+    fi
+
+    if [ -z "$PLATFORM" ]; then
+        PLATFORM="arm-v7a"  # Users could choose "arm-v8a" or other platforms from the command line.
+    fi
+
+    if [ "${PLATFORM}" = "arm-v7a" ]; then
+        ABI="armeabi-v7a with NEON"
+        ARM_PLATFORM="V7"
+        CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security"
+    elif [ "${PLATFORM}" = "arm-v8a" ]; then
+        ABI="arm64-v8a"
+        ARM_PLATFORM="V8"
+        CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a  -pie -fPIE -w -Wno-error=format-security -llog"
+    else
+        echo "unknown platform!"
+        exit -1
+    fi
+
+
+    MODE="Release"
+    ANDROID_PLATFORM_VERSION="android-15"
+    TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake"
+    ANDROID_ARM_MODE="arm"
+    if [ $# -eq 1 ]; then
+    NET=$1
+    cmake .. \
+        -B"../build/release/${PLATFORM}" \
+        -DANDROID_ABI="${ABI}" \
+        -DCMAKE_BUILD_TYPE="${MODE}" \
+        -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
+        -DANDROID_PLATFORM="${ANDROID_PLATFORM_VERSION}" \
+        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+        -DANDROID_STL=c++_static \
+        -DANDROID=true \
+        -D"${NET}=true" \
+        -D"${ARM_PLATFORM}"=true
+    else
+
+    cmake .. \
+        -B"../build/release/${PLATFORM}" \
+        -DANDROID_ABI="${ABI}" \
+        -DCMAKE_BUILD_TYPE="${MODE}" \
+        -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
+        -DANDROID_PLATFORM="${ANDROID_PLATFORM_VERSION}" \
+        -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+        -DANDROID_STL=c++_static \
+        -DANDROID=true \
+        -D"${ARM_PLATFORM}"=true
+    fi
+    cd "../build/release/${PLATFORM}"
+    make -j 8
+}
+
+build_for_ios() {
+    rm -rf "../build"
+    PLATFORM="ios"
+    MODE="Release"
+    BUILD_DIR=../build/release/"${PLATFORM}"
+    TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake"
+    C_FLAGS="-fobjc-abi-version=2 -fobjc-arc -isysroot ${CMAKE_OSX_SYSROOT}"
+    CXX_FLAGS="-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT}"
+    mkdir -p "${BUILD_DIR}"
+    if [ $# -eq 1 ]; then
+        NET=$1
+        cmake .. \
+            -B"${BUILD_DIR}" \
+            -DCMAKE_BUILD_TYPE="${MODE}" \
+            -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
+            -DIOS_PLATFORM=OS \
+            -DCMAKE_C_FLAGS="${C_FLAGS}" \
+            -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+            -D"${NET}"=true \
+            -DIS_IOS="true"
+    else
+        cmake .. \
+            -B"${BUILD_DIR}" \
+            -DCMAKE_BUILD_TYPE="${MODE}" \
+            -DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}" \
+            -DIOS_PLATFORM=OS \
+            -DCMAKE_C_FLAGS="${C_FLAGS}" \
+            -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+            -DIS_IOS="true"
+    fi
+    cd "${BUILD_DIR}"
+    make -j 8
+}
+
+build_error() {
+    echo "unknown argument"
+}
+
+if [ $# -lt 1 ]; then
+	echo "error: target missing!"
+    echo "available targets: mac|linux|ios|android"
+    echo "sample usage: ./build.sh mac"
+else
+    if [ $# -eq 2 ]; then
+        if [ $2 != "googlenet" -a $2 != "mobilenet" -a $2 != "yolo" -a $2 != "squeezenet" -a $2 != "resnet" ]; then
+            if [ $1 = "mac" ]; then
+		        build_for_mac
+	        elif [ $1 = "linux" ]; then
+		        build_for_linux
+	        elif [ $1 = "android" ]; then
+		        build_for_android
+	        elif [ $1 = "ios" ]; then
+		        build_for_ios
+	        else
+		        build_error
+	        fi
+        else
+            if [ $1 = "mac" ]; then
+		        build_for_mac $2
+	        elif [ $1 = "linux" ]; then
+		        build_for_linux $2
+	        elif [ $1 = "android" ]; then
+		        build_for_android $2
+	        elif [ $1 = "ios" ]; then
+		        build_for_ios $2
+	        else
+		        build_error
+	        fi
+        fi
+    else
+        if [ $1 = "mac" ]; then
+		    build_for_mac
+	    elif [ $1 = "linux" ]; then
+		    build_for_linux
+	    elif [ $1 = "android" ]; then
+		    build_for_android
+	    elif [ $1 = "ios" ]; then
+		    build_for_ios
+	    else
+		    build_error
+	    fi
+	fi
+fi
diff --git a/tools/pre-commit.hooks/clang-format.hook b/tools/pre-commit.hooks/clang-format.hook
index 406850d1a2450b49463563c0034c6c969895bfe4..4fa4253bad78fe287fb92863a684a5d7def71061 100644
--- a/tools/pre-commit.hooks/clang-format.hook
+++ b/tools/pre-commit.hooks/clang-format.hook
@@ -12,4 +12,8 @@ if ! [[ $version == *"$VERSION"* ]]; then
     exit -1
 fi
 
-clang-format $@
+# https://medicineyeh.wordpress.com/2017/07/13/clang-format-with-pragma/
+shift
+perl -i -pe 's|#pragma\s+omp|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> #pragma omp|' "$@"
+clang-format -i $@
+perl -i -pe 's|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> ||' "$@"
diff --git a/scripts/push2android.sh b/tools/scripts/push2android.sh
similarity index 52%
rename from scripts/push2android.sh
rename to tools/scripts/push2android.sh
index 44b0ee32e99ccddf5cc6060882dc37158c149693..33737f2ed37a233dc17892591f429a24c1832a4b 100644
--- a/scripts/push2android.sh
+++ b/tools/scripts/push2android.sh
@@ -1,13 +1,16 @@
 #!/usr/bin/env sh
 
 push_fn () {
-MODELS_PATH="../test/models/*"
-EXE_FILE="../test/build/*"
+MODELS_PATH="../../test/models/*"
+IMAGE_PATH="../../test/images/*"
+EXE_FILE="../../test/build/*"
 EXE_DIR="data/local/tmp/bin"
 MODELS_DIR="data/local/tmp/models"
-LIB_PATH="../build/release/arm-v7a/build/*"
+IMAGES_DIR="data/local/tmp/images"
+LIB_PATH="../../build/release/arm-v7a/build/*"
 adb push ${EXE_FILE} ${EXE_DIR}
 adb push ${LIB_PATH} ${EXE_DIR}
+adb push ${IMAGE_PATH} ${IMAGES_DIR}
 adb push ${MODELS_PATH} ${MODELS_DIR}
 echo "test files sync completed"
 }