add fc in types

a3d6c54d · eclipsess · 7d03a7c4 · 85a3efe1 · a3d6c54d · a3d6c54d
157 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.0)
 project(paddle-mobile)
-add_definitions(-DPADDLE_MOBILE_DEBUG)
-add_definitions(-DENABLE_EXCEPTION)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+option(DEBUGING "enable debug mode" OFF)
-set(CMAKE_BUILD_TYPE RelWithDebInfo)
+option(USE_OPENMP "openmp support" OFF)
+option(USE_EXCEPTION "use std exception" OFF)
+if (DEBUGING)
+    set(CMAKE_BUILD_TYPE Debug)
+else()
+    set(CMAKE_BUILD_TYPE Release)
+endif ()
+if(DEBUGING)
+    message(STATUS "debuging")
+    add_definitions(-DPADDLE_MOBILE_DEBUG)
+    if(ANDROID)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
+    endif()
+else()
+    message(STATUS "releasing")
+    add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
+endif()
+if (USE_EXCEPTION)
+    add_definitions(-DENABLE_EXCEPTION)
+    add_definitions(-fexceptions)
+else()
+    add_definitions(-fno-exceptions)
+endif ()
+if(IS_MAC)
+    add_definitions(-DX86)
+elseif(IS_IOS)
+    add_definitions(-DIOS)
+elseif(V7)
+    add_definitions(-DARMV7)
+elseif(V8)
+    add_definitions(-DARMV8)
+else ()
+    add_definitions(-DX86)
+endif()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
 set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
@@ -14,26 +52,86 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
 file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
-# include headers
+if (NOT ANDROID)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.cpp)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/*.h)
+list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
+endif ()
 include_directories(src/)
-#include(ExternalProject)
+if(USE_OPENMP)
-#ExternalProject_Add(openblas_proj
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
-#        GIT_REPOSITORY "https://github.com/xianyi/OpenBLAS.git"
+    add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
-#        GIT_TAG "v0.2.20"
+endif()
-#        SOURCE_DIR "openblas/"
-#        BUILD_IN_SOURCE 1
+if (googlenet)
-#        CONFIGURE_COMMAND ""
+    add_definitions(-DCONCAT_OP)
-#        BUILD_COMMAND "make" "ONLY_CBLAS=1"
+    add_definitions(-DCONV_OP)
-#        INSTALL_COMMAND "make" "PREFIX=${CMAKE_BINARY_DIR}/" "install"
+    add_definitions(-DLRN_OP)
-#        )
+    add_definitions(-DMUL_OP)
-#set_target_properties(openblas_proj PROPERTIES EXCLUDE_FROM_ALL 1)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+elseif (mobilenet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+elseif (yolo)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+elseif (squeezenet)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSOFTMAX_OP)
+elseif(resnet)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DRELU_OP)
+else ()
+    add_definitions(-DBATCHNORM_OP)
+    add_definitions(-DBOXCODER_OP)
+    add_definitions(-DCONCAT_OP)
+    add_definitions(-DCONV_OP)
+    add_definitions(-DDEPTHWISECONV_OP)
+    add_definitions(-DELEMENTWISEADD_OP)
+    add_definitions(-DFUSIONCONVADD_OP)
+    add_definitions(-DCONVADDRELU_OP)
+    add_definitions(-DFUSION_FC_OP)
+    add_definitions(-DLRN_OP)
+    add_definitions(-DMUL_OP)
+    add_definitions(-DMULTICLASSNMS_OP)
+    add_definitions(-DPOOL_OP)
+    add_definitions(-DPRIORBOX_OP)
+    add_definitions(-DRELU_OP)
+    add_definitions(-DRESHAPE_OP)
+    add_definitions(-DSIGMOID_OP)
+    add_definitions(-DSOFTMAX_OP)
+    add_definitions(-DTRANSPOSE_OP)
+endif()
-#add_dependencies(paddle-mobile openblas_proj)
+add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
-# gen static
+if(DEBUGING)
-ADD_LIBRARY(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
+    add_subdirectory(test)
+endif()
-#add_dependencies(paddle-mobile openblas_proj)
-add_subdirectory(test)
--- a/src/common/enforce.h
+++ b/src/common/enforce.h
@@ -17,8 +17,6 @@ limitations under the License. */
 #ifdef ENABLE_EXCEPTION
 #include <stdio.h>
 #include <exception>
-#include <sstream>
-#include <stdexcept>
 #include <string>
 #endif
@@ -32,12 +30,11 @@ struct PaddleMobileException : public std::exception {
  PaddleMobileException(const char *header, const char *detail,
                        const char *file, const int line) {
-    std::stringstream ss;
+    char buffer[1500];
-    ss << exception_prefix << "| " << header << "\n";
+    snprintf(buffer, sizeof(buffer),
-    ss << "| [in file] : " << file << " \n";
+             "%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail]  : %s\n",
-    ss << "| [on line] : " << line << " \n";
+             exception_prefix.c_str(), header, file, line, detail);
-    ss << "| [detail]  : " << detail;
+    message = std::string(buffer);
-    message = ss.str();
  }
  const char *what() const noexcept { return message.c_str(); }
 };

--- a/src/common/log.h
+++ b/src/common/log.h
@@ -20,11 +20,38 @@ limitations under the License. */
 #include <sstream>
 #include <string>
 #endif
+#ifdef ANDROID
+#include <android/log.h>
+#endif
 namespace paddle_mobile {
 #ifdef PADDLE_MOBILE_DEBUG
+#ifdef ANDROID
+extern const char *ANDROID_LOG_TAG;
+#define ANDROIDLOGI(...)                                               \
+  __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGW(...)                                                  \
+  __android_log_print(ANDROID_LOG_WARNING, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGD(...)                                                \
+  __android_log_print(ANDROID_LOG_DEBUG, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#define ANDROIDLOGE(...)                                                \
+  __android_log_print(ANDROID_LOG_ERROR, ANDROID_LOG_TAG, __VA_ARGS__); \
+  printf(__VA_ARGS__)
+#else
+#define ANDROIDLOGI(...)
+#define ANDROIDLOGW(...)
+#define ANDROIDLOGD(...)
+#define ANDROIDLOGE(...)
+#endif
 enum LogLevel {
  kNO_LOG,
  kLOG_ERROR,
@@ -122,6 +149,11 @@ struct ToLog {
 #else
+#define ANDROIDLOGI(...)
+#define ANDROIDLOGW(...)
+#define ANDROIDLOGD(...)
+#define ANDROIDLOGE(...)
 enum LogLevel {
  kNO_LOG,
  kLOG_ERROR,

--- a/src/common/log.cpp
+++ b/src/common/log.cpp
@@ -12,6 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "log.h"
+#pragma once
-namespace paddle_mobile {}
+#define EXPORT __attribute__((visibility("default")))
--- a/src/platform/macros.h
+++ b/src/platform/macros.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef PADDLE_MOBILE_USE_OPENMP
+/**
-// Disable the copy and assignment operator for a class.
+ * android-ndk-r17 has a problem when linking with openmp.
-#ifndef DISABLE_COPY_AND_ASSIGN
+ * if paddle-mobile enables -fopenmp, but didn't use those omp_* functions,
-#define DISABLE_COPY_AND_ASSIGN(classname)          \
+ * after linking another binary with libpaddle-mobile.so, the omp_get_thread_num
- private:                                           \
+ * will not work. see test/common/test_openmp.cc the detailed reason is still
-  classname(const classname &) = delete;            \
+ * unclear, but this trick will work. a better solution is hacking the linker,
-  classname(classname &&) = delete;                 \
+ * try some flags to make it link omp_* functions, but I didn't find out how to
-  classname &operator=(const classname &) = delete; \
+ * make it work.
-  classname &operator=(classname &&) = delete
+ */
+#include <omp.h>
+static int _ = omp_get_num_procs();
 #endif
--- a/src/common/protobuf-c.c
+++ b/src/common/protobuf-c.c
--- a/src/common/protobuf-c.h
+++ b/src/common/protobuf-c.h
@@ -798,76 +798,6 @@ uint32_t protobuf_c_version_number(void);
 */
 #define PROTOBUF_C_MIN_COMPILER_VERSION 1000000
-/**
- * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by name.
- *
- * \param desc
- *      The `ProtobufCEnumDescriptor` object.
- * \param name
- *      The `name` field from the corresponding `ProtobufCEnumValue` object to
- *      match.
- * \return
- *      A `ProtobufCEnumValue` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value_by_name(
-    const ProtobufCEnumDescriptor *desc, const char *name);
-/**
- * Look up a `ProtobufCEnumValue` from a `ProtobufCEnumDescriptor` by numeric
- * value.
- *
- * \param desc
- *      The `ProtobufCEnumDescriptor` object.
- * \param value
- *      The `value` field from the corresponding `ProtobufCEnumValue` object to
- *      match.
- *
- * \return
- *      A `ProtobufCEnumValue` object.
- * \retval NULL
- *      If not found.
- */
-PROTOBUF_C__API
-const ProtobufCEnumValue *protobuf_c_enum_descriptor_get_value(
-    const ProtobufCEnumDescriptor *desc, int value);
-/**
- * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
- * the name of the field.
- *
- * \param desc
- *      The `ProtobufCMessageDescriptor` object.
- * \param name
- *      The name of the field.
- * \return
- *      A `ProtobufCFieldDescriptor` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field_by_name(
-    const ProtobufCMessageDescriptor *desc, const char *name);
-/**
- * Look up a `ProtobufCFieldDescriptor` from a `ProtobufCMessageDescriptor` by
- * the tag value of the field.
- *
- * \param desc
- *      The `ProtobufCMessageDescriptor` object.
- * \param value
- *      The tag value of the field.
- * \return
- *      A `ProtobufCFieldDescriptor` object.
- * \retval NULL
- *      If not found.
- */
-PROTOBUF_C__API
-const ProtobufCFieldDescriptor *protobuf_c_message_descriptor_get_field(
-    const ProtobufCMessageDescriptor *desc, unsigned value);
 /**
 * Determine the number of bytes required to store the serialised message.
 *
@@ -947,33 +877,6 @@ PROTOBUF_C__API
 void protobuf_c_message_init(const ProtobufCMessageDescriptor *descriptor,
                             void *message);
-/**
- * Free a service.
- *
- * \param service
- *      The service object to free.
- */
-PROTOBUF_C__API
-void protobuf_c_service_destroy(ProtobufCService *service);
-/**
- * Look up a `ProtobufCMethodDescriptor` by name.
- *
- * \param desc
- *      Service descriptor.
- * \param name
- *      Name of the method.
- *
- * \return
- *      A `ProtobufCMethodDescriptor` object.
- * \retval NULL
- *      If not found or if the optimize_for = CODE_SIZE option was set.
- */
-PROTOBUF_C__API
-const ProtobufCMethodDescriptor *
-protobuf_c_service_descriptor_get_method_by_name(
-    const ProtobufCServiceDescriptor *desc, const char *name);
 /**
 * Initialise a `ProtobufCBufferSimple` object.
 */
@@ -1011,18 +914,6 @@ PROTOBUF_C__API
 void protobuf_c_buffer_simple_append(ProtobufCBuffer *buffer, size_t len,
                                     const unsigned char *data);
-PROTOBUF_C__API
-void protobuf_c_service_generated_init(
-    ProtobufCService *service, const ProtobufCServiceDescriptor *descriptor,
-    ProtobufCServiceDestroy destroy);
-PROTOBUF_C__API
-void protobuf_c_service_invoke_internal(ProtobufCService *service,
-                                        unsigned method_index,
-                                        const ProtobufCMessage *input,
-                                        ProtobufCClosure closure,
-                                        void *closure_data);
 /**@}*/
 PROTOBUF_C__END_DECLS

--- a/src/common/type_define.h
+++ b/src/common/type_define.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #include <map>
 #include <string>
-#include <unordered_set>
 #include <vector>
 #include "framework/attribute.h"
 #include "framework/scope.h"
@@ -40,13 +39,6 @@ using OpCreator = std::function<framework::OperatorBase<Dtype> *(
    const framework::AttributeMap & /*attrs*/,
    std::shared_ptr<framework::Scope> /*scope*/)>;
-using GradOpMakerFN =
-    std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
-        const framework::OpDesc &,
-        const std::unordered_set<std::string> & /*no_grad_set*/,
-        std::unordered_map<std::string, std::string> * /*grad_to_var*/,
-        const std::vector<framework::BlockDesc *> &grad_block)>;
 using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
                                          framework::BlockDesc * /*block*/)>;

--- a/src/common/types.h
+++ b/src/common/types.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
-#include <utility>
 namespace paddle_mobile {
 enum class Precision : int { FP32 = 0 };
@@ -80,6 +79,7 @@ static const std::string G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
 static const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU =
    "fusion_conv_add_relu";
 static const std::string G_OP_TYPE_FC = "fc";
+static const std::string G_OP_TYPE_CONV_ADD = "conv_add";
 static const std::string G_OP_TYPE_LRN = "lrn";
 static const std::string G_OP_TYPE_MUL = "mul";
 static const std::string G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
@@ -116,5 +116,6 @@ static std::unordered_map<
        {G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}},
        {G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
        {G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}}};
 }  // namespace paddle_mobile
--- a/src/common/variant.cpp
+++ b/src/common/variant.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
--- a/src/common/variant.h
+++ b/src/common/variant.h
@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include <iostream>
+#include "common/enforce.h"
 #include "common/log.h"
 #pragma once
@@ -57,15 +56,11 @@ class RawData {
  char data[size];
  RawData() {}
  RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
-  //      void operator=(const RawData &raw_data){
-  //        strcpy(data, raw_data.data);
-  //      }
 };
 template <typename... Ts>
 struct Variant {
  Variant(const Variant &variant) {
-    //        std::cout << " 赋值构造函数 " << std::endl;
    type_id = variant.type_id;
    data = variant.data;
  }
@@ -87,8 +82,7 @@ struct Variant {
    if (type_id == typeid(T).hash_code()) {
      return *const_cast<T *>(reinterpret_cast<const T *>(&data));
    } else {
-      //      std::cout << " bad cast in variant " << std::endl;
+      PADDLE_MOBILE_THROW_EXCEPTION(" bad cast in variant ");
-      throw std::bad_cast();
    }
  }

--- a/src/framework/attribute.cpp
+++ b/src/framework/attribute.cpp
@@ -17,14 +17,8 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
-/*
- * Variant<int, float, std::string, std::vector<int>, std::vector<float>,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
-          int64_t>
- * */
 struct PrintVistor : Vistor<Print &> {
-  PrintVistor(Print &printer) : printer_(printer) {}
+  explicit PrintVistor(Print &printer) : printer_(printer) {}
  template <typename T>
  Print &operator()(const T &value) {
    printer_ << value;

--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -14,7 +14,10 @@ limitations under the License. */
 #pragma once
+#include <string>
 #include <unordered_map>
+#include <vector>
 #include "common/enforce.h"
 #include "common/log.h"
 #include "common/variant.h"
@@ -22,28 +25,15 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
+using std::string;
+using std::vector;
 class BlockDesc;
 class Attribute {
 public:
-  /*
-   *  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INT = 0,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOAT = 1,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRING = 2,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS = 3,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS = 4,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS = 5,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN = 6,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS = 7,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK = 8,
-  PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG = 9
-    PROTOBUF_C__FORCE_ENUM_TO_BE_INT_SIZE(PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE)
-   *
-   * */
  static Attribute GetAttrValue(
      PaddleMobile__Framework__Proto__OpDesc__Attr *attr_desc) {
-    //    std::cout << "begin get attr value" << std::endl;
    Attribute attr;
    switch (attr_desc->type) {
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEAN: {
@@ -63,35 +53,35 @@ class Attribute {
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BOOLEANS: {
-        std::vector<bool> val(attr_desc->n_bools);
+        vector<bool> val(attr_desc->n_bools);
        for (int i = 0; i < attr_desc->n_bools; ++i) {
          val[i] = attr_desc->bools[i];
        }
-        attr.Set<std::vector<bool>>(val);
+        attr.Set<vector<bool>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__INTS: {
-        std::vector<int> val(attr_desc->n_ints);
+        vector<int> val(attr_desc->n_ints);
        for (int i = 0; i < attr_desc->n_ints; ++i) {
          val[i] = attr_desc->ints[i];
        }
-        attr.Set<std::vector<int>>(val);
+        attr.Set<vector<int>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__FLOATS: {
-        std::vector<float> val(attr_desc->n_floats);
+        vector<float> val(attr_desc->n_floats);
        for (int i = 0; i < attr_desc->n_floats; ++i) {
          val[i] = attr_desc->floats[i];
        }
-        attr.Set<std::vector<float>>(val);
+        attr.Set<vector<float>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__STRINGS: {
-        std::vector<std::string> val(attr_desc->n_strings);
+        vector<string> val(attr_desc->n_strings);
        for (int i = 0; i < attr_desc->n_strings; ++i) {
          val[i] = attr_desc->strings[i];
        }
-        attr.Set<std::vector<std::string>>(val);
+        attr.Set<vector<string>>(val);
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__LONG: {
@@ -122,47 +112,41 @@ class Attribute {
      return vistor(attr.variant_.Get<int>());
    } else if (attr.variant_.TypeId() == typeid(float).hash_code()) {
      return vistor(attr.variant_.Get<float>());
-    } else if (attr.variant_.TypeId() == typeid(std::string).hash_code()) {
+    } else if (attr.variant_.TypeId() == typeid(string).hash_code()) {
-      return vistor(attr.variant_.Get<std::string>());
+      return vistor(attr.variant_.Get<string>());
-    } else if (attr.variant_.TypeId() == typeid(std::vector<int>).hash_code()) {
+    } else if (attr.variant_.TypeId() == typeid(vector<int>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<int>>());
+      return vistor(attr.variant_.Get<vector<int>>());
-    } else if (attr.variant_.TypeId() ==
+    } else if (attr.variant_.TypeId() == typeid(vector<float>).hash_code()) {
-               typeid(std::vector<float>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<float>>());
-      return vistor(attr.variant_.Get<std::vector<float>>());
+    } else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) {
-    } else if (attr.variant_.TypeId() ==
+      return vistor(attr.variant_.Get<vector<string>>());
-               typeid(std::vector<std::string>).hash_code()) {
-      return vistor(attr.variant_.Get<std::vector<std::string>>());
    } else if (attr.variant_.TypeId() == typeid(bool).hash_code()) {
      return vistor(attr.variant_.Get<bool>());
-    } else if (attr.variant_.TypeId() ==
+    } else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) {
-               typeid(std::vector<bool>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<bool>>());
-      return vistor(attr.variant_.Get<std::vector<bool>>());
    } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
      return vistor(attr.variant_.Get<int64_t>());
    } else {
-      throw std::bad_exception();
+      PADDLE_MOBILE_THROW_EXCEPTION("type not support");
    }
  }
 private:
-  Variant<int, float, std::string, std::vector<int>, std::vector<float>,
+  Variant<int, float, string, vector<int>, vector<float>, vector<string>, bool,
-          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
+          vector<bool>, BlockDesc *, int64_t>
-          int64_t>
      variant_;
 };
-using AttributeMap = std::unordered_map<std::string, Attribute>;
+using AttributeMap = std::unordered_map<string, Attribute>;
 class AttrReader {
 public:
  explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
  template <typename T>
-  inline T Get(const std::string &name) const {
+  inline T Get(const string &name) const {
-    //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
+    PADDLE_MOBILE_ENFORCE(attrs_.count(name) != 0,
-    //          be in
+                          "%s should  be in AttributeMap", name);
-    //          AttributeMap",
-    //                         name);
    return ((Attribute)attrs_.at(name)).Get<T>();
  }

--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 #include <cctype>
-#include <iostream>
 #include <string>
 namespace paddle_mobile {
@@ -40,7 +39,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
  } else if (s == "ANYLAYOUT") {
    return DataLayout::kAnyLayout;
  } else {
-    //    std::cout << "Unknown storage order string: %s", s;
+    PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
  }
 }
@@ -54,14 +53,8 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) {
      return "ANY_LAYOUT";
    default:
      break;
-      //      std::cout << "unknown DataLayou %d", data_layout;
  }
 }
-inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
-  out << DataLayoutToString(l);
-  return out;
-}
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/data_transform.cpp
+++ b/src/framework/data_transform.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "framework/data_transform.h"
-namespace paddle_mobile {
-namespace framework {
-static void PassTensorData(Tensor *from, Tensor *to) {
-  to->ShareDataWith(*from);
-  *from = Tensor();
-}
-void DataTransform(const OpKernelType &expected_kernel_type,
-                   const OpKernelType &kernel_type_for_var,
-                   const Tensor &input_tensor, Tensor *output_tensor) {
-  bool transformed = false;
-  Tensor in;
-  in.ShareDataWith(input_tensor);
-  Tensor out;
-  //  // do layout transform
-  //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-  //                          kernel_type_for_var.data_layout_)) {
-  //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do data type transform
-  //  if (expected_kernel_type.data_type_ !=
-  //  kernel_type_for_var.data_type_) {
-  //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-  //    &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  // do device transform
-  //  if (!platform::is_same_place(kernel_type_for_var.place_,
-  //                               expected_kernel_type.place_)) {
-  //    TransDataDevice(in, expected_kernel_type.place_, &out);
-  //    transformed = true;
-  //    PassTensorData(&out, &in);
-  //  }
-  //
-  //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-  //  check!");
-  // get output data
-  output_tensor->ShareDataWith(in);
-}
-void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                            Variable *out_var) {
-  //  if (in_var.IsType<LoDTensor>()) {
-  //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-  //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-  //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-  //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-  //    tran_lod_tensor->ShareDataWith(tensor);
-  //  } else if (in_var.IsType<SelectedRows>()) {
-  //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-  //    auto* trans_selected_rows =
-  //    out_var.GetMutable<SelectedRows>();
-  //    trans_selected_rows->set_height(in_selected_rows.height());
-  //    trans_selected_rows->set_rows(in_selected_rows.rows());
-  //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-  //  } else {
-  //    PADDLE_THROW("unknown var type");
-  //  }
-}
-}  // namespace framework
-}  // namespace paddle_mobile
--- a/src/framework/data_type.h
+++ b/src/framework/data_type.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-namespace paddle_mobile {
-namespace framework {
-//    inline proto::VarType::Type ToDataType(std::type_index type) {
-//        using namespace paddle_mobile::framework::proto;
-//        if (typeid(float).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP32;
-//        } else if (typeid(double).hash_code() == type.hash_code()) {
-//            return proto::VarType::FP64;
-//        } else if (typeid(int).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT32;
-//        } else if (typeid(int64_t).hash_code() == type.hash_code()) {
-//            return proto::VarType::INT64;
-//        } else if (typeid(bool).hash_code() == type.hash_code()) {
-//            return proto::VarType::BOOL;
-//        } else {
-////            PADDLE_THROW("Not supported");
-//        }
-//    }
-}  // namespace framework
-}  // namespace paddle_mobile
--- a/src/framework/ddim.cpp
+++ b/src/framework/ddim.cpp
@@ -63,9 +63,6 @@ void make_ddim(DDim &ddim, const int64_t *dims, int n) {
      ddim = make_dim<9>(dims);
      break;
    default:
-      //      std::cout << "Dynamic dimensions must have between [1,
-      //      9]
-      //      dimensions.";
      break;
  }
 }
@@ -133,9 +130,6 @@ int64_t DDim::operator[](int idx) const {
 int DDim::size() const { return arity(*this); }
 bool DDim::operator==(DDim d) const {
-  //  if (var.which() != d.getVar().which()) {
-  //    return false;
-  //  } else {
  std::vector<int64_t> v1 = vectorize(*this);
  std::vector<int64_t> v2 = vectorize(d);
@@ -157,7 +151,7 @@ DDim DDim::operator+(DDim d) const {
  std::vector<int64_t> v3;
-  assert(v1.size() == v2.size());
+  PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() != v2.size()");
  for (unsigned int i = 0; i < v1.size(); i++) {
    v3.push_back(v1[i] + v2[i]);
@@ -172,7 +166,7 @@ DDim DDim::operator*(DDim d) const {
  std::vector<int64_t> v3;
-  assert(v1.size() == v2.size());
+  PADDLE_MOBILE_ENFORCE(v1.size() == v2.size(), "v1.size() == v2.size()");
  for (unsigned int i = 0; i < v1.size(); i++) {
    v3.push_back(v1[i] * v2[i]);
@@ -183,7 +177,7 @@ DDim DDim::operator*(DDim d) const {
 int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
-void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
+void set(DDim *ddim, int idx, int value) { (*ddim)[idx] = value; }
 /// @cond HIDDEN
 struct VectorizeVisitor : Vistor<void> {
@@ -235,13 +229,10 @@ struct SliceVectorizeVisitor : Vistor<void> {
  SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
      : vector(v), begin(b), end(e) {
-    //    PADDLE_ENFORCE(begin < end,
+    PADDLE_MOBILE_ENFORCE(
-    //                   "Begin index must be less than end index in
+        begin < end, "Begin index must be less than end index in ddim slice.");
-    //                   ddim
+    PADDLE_MOBILE_ENFORCE(begin >= 0,
-    //                   slice.");
+                          "Begin index can't be less than zero in ddim slice.");
-    //    PADDLE_ENFORCE(begin >= 0,
-    //                   "Begin index can't be less than zero in
-    //                   ddim slice.");
  }
  template <int S>
@@ -267,9 +258,7 @@ DDim slice_ddim(const DDim &ddim, int begin, int end) {
  std::vector<int64_t> vec;
  vec.reserve(end - begin);
  SliceVectorizeVisitor visitor(vec, begin, end);
-  //  boost::apply_visitor(visitor, dim);
  DDim::ApplyVistor(visitor, ddim);
-  //  visitor(ddim.var.Get<Dim<4>>());
  return make_ddim(vec);
 }
@@ -287,31 +276,19 @@ struct ArityVisitor : Vistor<int> {
 int arity(const DDim &d) {
  ArityVisitor arityVisitor = ArityVisitor();
  return DDim::ApplyVistor(arityVisitor, d);
-  //  return arityVisitor(d.var.Get<Dim<4>>());
-  //  return boost::apply_visitor(ArityVisitor(), d); }
 }
-/// \cond HIDDEN
-/// \endcond
-struct OSVistor : Vistor<std::ostream &> {
+#ifdef PADDLE_MOBILE_DEBUG
-  OSVistor(std::ostream &os) : os_(os) {}
+Print &operator<<(Print &printer, const DDim &ddim) {
+  for (int j = 0; j < ddim.size(); ++j) {
-  template <int D>
+    printer << ddim[j] << " ";
-  std::ostream &operator()(Dim<D> dim) const {
-    return os_ << dim;
  }
- private:
+  return printer;
-  std::ostream &os_;
-};
-std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
-  auto vistor = OSVistor(os);
-  DDim::ApplyVistor(vistor, ddim);
-  return os;
 }
+#endif
 DDim::DDim(std::initializer_list<int64_t> init_list) {
  *this = make_ddim(init_list);
 }

--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
@@ -14,10 +14,9 @@ limitations under the License. */
 #pragma once
-#include <assert.h>
 #include <initializer_list>
-#include <stdexcept>
 #include <vector>
+#include "common/enforce.h"
 #include "common/variant.h"
 #include "dim.h"
@@ -58,9 +57,7 @@ struct DDim {
    } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
      return vistor(d.var.Get<Dim<9>>());
    } else {
-      printf(" dim not support  \n");
+      DLOG << " dim not support";
-      throw std::bad_exception();
-      //        return typename Vistor::type_t();
    }
  }
@@ -83,17 +80,6 @@ struct DDim {
  int64_t operator[](int idx) const;
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
-  //    return var.apply_visitor(visitor);
-  //  }
-  //
-  //  template <typename Visitor>
-  //  typename Visitor::result_type apply_visitor(Visitor& visitor)
-  //  const {
-  //    return var.apply_visitor(visitor);
-  //  }
  DDimVar getVar() { return var; }
  bool operator==(DDim d) const;
@@ -126,7 +112,7 @@ DDim make_ddim(std::initializer_list<int64_t> dims);
 int64_t get(const DDim &dim, int idx);
-void set(DDim &dim, int idx, int val);
+void set(DDim *dim, int idx, int val);
 std::vector<int64_t> vectorize(const DDim &ddim);
@@ -151,8 +137,6 @@ DDim slice_ddim(const DDim &dim, int begin, int end);
 int arity(const DDim &ddim);
-std::ostream &operator<<(std::ostream &, const DDim &);
 // Reshape a tensor to a matrix. The matrix's first dimension(column
 // length)
 // will be the product of tensor's first `num_col_dims` dimensions.
@@ -163,5 +147,9 @@ DDim flatten_to_1d(const DDim &src);
 DDim stride(const DDim &ddim);
 DDim stride_numel(const DDim &ddim);
+#ifdef PADDLE_MOBILE_DEBUG
+Print &operator<<(Print &printer, const DDim &ddim);
+#endif
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/dim.h
+++ b/src/framework/dim.h
@@ -14,13 +14,7 @@ limitations under the License. */
 #pragma once
-#include <iostream>
+#include "common/enforce.h"
-#include <sstream>
-#include <stdexcept>
-#include <type_traits>
-#include "platform/hostdevice.h"
 namespace paddle_mobile {
 namespace framework {
@@ -30,42 +24,35 @@ struct Dim {
  static constexpr int dimensions = i;
  template <typename... Args>
-  HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
+  Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
    static_assert(sizeof...(_tail) == i - 1,
                  "Dim initialized with the wrong number of parameters");
  }
-  HOSTDEVICE
  Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
-  HOSTDEVICE
  Dim() : head(0), tail() {}
  /** Construct a Dim from a linear index and size.  Uses Fortran
   * order
   * indexing. */
-  HOSTDEVICE
  Dim(int64_t idx, const Dim<i> &size)
      : head(idx % size.head), tail(idx / size.head, size.tail) {}
  /** Construct a Dim with each dimension set to the given index */
-  HOSTDEVICE
  Dim(int64_t idx) : head(idx), tail(idx) {}
-  HOSTDEVICE
  bool operator==(const Dim<i> &o) const {
    return (head == o.head) && (tail == o.tail);
  }
-  HOSTDEVICE
  bool operator!=(const Dim<i> &o) const { return !(*this == o); }
-  HOSTDEVICE
  int64_t &operator[](int idx);
-  HOSTDEVICE
  int64_t operator[](int idx) const;
-  HOST std::string to_string() const;
+  std::string to_string() const;
  int64_t head;
  Dim<i - 1> tail;
@@ -76,32 +63,22 @@ template <>
 struct Dim<0> {
  static constexpr int dimensions = 0;
-  HOSTDEVICE
  Dim(int64_t _head) {}
-  HOSTDEVICE
  Dim() {}
-  HOSTDEVICE
  Dim(int idx, const Dim<0> &size) {
-#ifndef __CUDA_ARCH__
    if (idx > 0) {
-      throw std::invalid_argument("Index out of range.");
+      PADDLE_MOBILE_THROW_EXCEPTION("Index out of range.")
    }
-#else
-    PADDLE_ASSERT(idx == 0);
-#endif
  }
-  HOSTDEVICE
  bool operator==(const Dim<0> &o) const { return true; }
-  HOSTDEVICE
  bool operator!=(const Dim<0> &o) const { return false; }
-  HOSTDEVICE
  int64_t &operator[](int idx);
-  HOSTDEVICE
  int64_t operator[](int idx) const;
 };
@@ -112,12 +89,12 @@ template <int i>
 struct DimGetter {
  // Return a copy if Dim is const
  template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
    return DimGetter<i - 1>::impl(d.tail);
  }
  // Return a reference if Dim is mutable
  template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
    return DimGetter<i - 1>::impl(d.tail);
  }
 };
@@ -127,25 +104,22 @@ template <>
 struct DimGetter<0> {
  // Return a copy if Dim is const
  template <typename D>
-  HOSTDEVICE static int64_t impl(const D &d) {
+  static int64_t impl(const D &d) {
    return d.head;
  }
  // Return a reference if Dim is mutable
  template <typename D>
-  HOSTDEVICE static int64_t &impl(D &d) {
+  static int64_t &impl(D &d) {
    return d.head;
  }
 };
 template <int D>
-HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
+int64_t &indexer(Dim<D> &dim, int idx) {
-#ifndef __CUDA_ARCH__
  if (idx < 0) {
-    throw std::invalid_argument("Tried to access a negative dimension");
+    PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
  }
-#else
-  PADDLE_ASSERT(idx >= 0);
-#endif
  if (idx == 0) {
    return dim.head;
  }
@@ -153,31 +127,15 @@ HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
 }
 template <>
-HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
+int64_t &indexer<0>(Dim<0> &dim, int idx) {
-#ifndef __CUDA_ARCH__
+  PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
-  throw std::invalid_argument("Invalid index");
-#else
-  PADDLE_ASSERT(false);
-#if CUDA_VERSION < 8000
-  // On CUDA versions previous to 8.0, only __shared__ variables
-  // could be declared as static in the device code.
-  int64_t head = 0;
-#else
-  static int64_t head = 0;
-#endif
-  return head;
-#endif
 }
 template <int D>
-HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
+int64_t indexer(const Dim<D> &dim, int idx) {
-#ifndef __CUDA_ARCH__
  if (idx < 0) {
-    throw std::invalid_argument("Tried to access a negative dimension");
+    PADDLE_MOBILE_THROW_EXCEPTION("Tried to access a negative dimension")
  }
-#else
-  PADDLE_ASSERT(idx >= 0);
-#endif
  if (idx == 0) {
    return dim.head;
  }
@@ -185,102 +143,84 @@ HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
 }
 template <>
-HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
+int64_t indexer<0>(const Dim<0> &dim, int idx) {
-#ifndef __CUDA_ARCH__
+  PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
-  throw std::invalid_argument("Invalid index");
-#else
-  PADDLE_ASSERT(false);
-#if CUDA_VERSION < 8000
-  // On CUDA versions previous to 8.0, only __shared__ variables
-  // could be declared as static in the device code.
-  int64_t head = 0;
-#else
-  static int64_t head = 0;
-#endif
-  return head;
-#endif
 }
 }  // namespace
 // Static access to constant Dim
 template <int i, int l>
-HOSTDEVICE int64_t get(const Dim<l> &d) {
+int64_t get(const Dim<l> &d) {
  return DimGetter<i>::impl(d);
 }
 // Static access to mutable Dim
 template <int i, int l>
-HOSTDEVICE int64_t &get(Dim<l> &d) {
+int64_t &get(Dim<l> &d) {
  return DimGetter<i>::impl(d);
 }
 // Dynamic access to constant Dim
 template <int l>
-HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
+int64_t Dim<l>::operator[](int i) const {
  //  std::cout << "l: " << l << std::endl;
  return indexer(*this, i);
 }
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
+int64_t &Dim<l>::operator[](int i) {
  return indexer(*this, i);
 }
 // Dynamic access to constant Dim
-inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
+inline int64_t Dim<0>::operator[](int i) const { return indexer(*this, i); }
-  return indexer(*this, i);
-}
 // Dynamic access to mutable Dim
-inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
+inline int64_t &Dim<0>::operator[](int i) { return indexer(*this, i); }
-  return indexer(*this, i);
-}
 // Dynamic access to constant Dim
 // without std::enable_if will try to instantiate this on get<0>(d)
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
+typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d, int i) {
-                                                               int i) {
  return d[i];
 }
 // Dynamic access to mutable Dim
 template <int l>
-HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
+typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d, int i) {
-                                                                 int i) {
  return d[i];
 }
 // Dot product of two dims
 template <int i>
-HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
+int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
  return a.head * b.head + linearize(a.tail, b.tail);
 }
 // Base case dot product of two Dims
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
+inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
  return 0;
 }
 // Product of a Dim
 template <int i>
-HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
+int64_t product(const Dim<i> &a, int prod = 1) {
  return prod * a.head * product(a.tail);
 }
 // Base case product of a Dim
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
+inline int64_t product(const Dim<0> &a, int prod) {
  return prod;
 }
 // Is 0 <= idx_i < size_i for all i?
 template <int i>
-HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
+bool contained(const Dim<i> &idx, const Dim<i> &size) {
  return ((0 <= idx.head) && (idx.head < size.head) &&
          contained(idx.tail, size.tail));
 }
@@ -288,7 +228,7 @@ HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
 // Base case of is 0 <= idx_i < size_i ?
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
+inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
  return true;
 }
@@ -296,7 +236,7 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
 * \brief Compute exclusive prefix-multiply of a Dim.
 */
 template <int i>
-HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
+Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
  return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
 }
@@ -304,7 +244,7 @@ HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
 // Base case of ex_prefix_mul
 // Notice it is inline because it is no longer a template
 template <>
-HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
+inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
  return Dim<0>();
 }
 ///\endcond
@@ -313,18 +253,18 @@ HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
 * Add two dimensions together
 */
 template <int i>
-HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
  return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
 }
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
  return Dim<0>();
 }
 template <int i>
-HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
  return dim_plus(lhs, rhs);
 }
@@ -332,18 +272,18 @@ HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
 * Multiply two dimensions together
 */
 template <int i>
-HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
+Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
  return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
 }
 // Base case
 template <>
-HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
+inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
  return Dim<0>();
 }
 template <int i>
-HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
+Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
  return dim_mult(lhs, rhs);
 }
@@ -358,7 +298,7 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
 */
 template <int i>
-HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
+Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
  int norm_stride = size.head == 1 ? 0 : stride.head;
  return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
 }
@@ -366,8 +306,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
 ///\cond HIDDEN
 template <>
-HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
+inline Dim<0> normalize_strides(const Dim<0> &size, const Dim<0> &stride) {
-                                           const Dim<0> &stride) {
  return Dim<0>();
 }
@@ -382,54 +321,9 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
 */
 template <typename... Args>
-HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
+Dim<sizeof...(Args)> make_dim(Args... idxes) {
  return Dim<sizeof...(Args)>(idxes...);
 }
-// Allows us to output a Dim
-// XXX For some reason, overloading fails to resolve this correctly
-template <int i>
-typename std::enable_if<(i > 1), std::ostream &>::type operator<<(
-    std::ostream &os, const Dim<i> &d) {
-  os << d.head << ", " << d.tail;
-  return os;
-}
-// Base case that allows us to output a Dim
-// XXX I wish this could be an overload instead of a template
-template <int i>
-typename std::enable_if<(i == 1), std::ostream &>::type operator<<(
-    std::ostream &os, const Dim<i> &d) {
-  os << d.head;
-  return os;
-}
-inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
-  return os;
-}
-template <int i>
-HOST std::string Dim<i>::to_string() const {
-  std::stringstream stream;
-  stream << *this;
-  return stream.str();
-}
-template <int D>
-HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
-  Dim<D> result;
-  for (int i = 0; i < D - 1; ++i) {
-    result[i] = linear_index % extents[i];
-    linear_index /= extents[i];
-  }
-  result[D - 1] = linear_index;
-  return result;
-}
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/lod_tensor.cpp
+++ b/src/framework/lod_tensor.cpp
@@ -13,72 +13,55 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "lod_tensor.h"
-#include <stdint.h>
-#include <string.h>
-#include <algorithm>
-#include <iterator>
 namespace paddle_mobile {
 namespace framework {
-std::ostream &operator<<(std::ostream &os, const LoD &lod) {
+// std::ostream &operator<<(std::ostream &os, const LoD &lod) {
-  os << "{";
+//  os << "{";
-  for (auto &v : lod) {
+//  for (auto &v : lod) {
-    os << "{";
+//    os << "{";
-    bool is_first = true;
+//    bool is_first = true;
-    for (auto &i : v) {
+//    for (auto &i : v) {
-      if (is_first) {
+//      if (is_first) {
-        os << i;
+//        os << i;
-        is_first = false;
+//        is_first = false;
-      } else {
+//      } else {
-        os << ", " << i;
+//        os << ", " << i;
-      }
+//      }
-    }
+//    }
-    os << "}";
+//    os << "}";
-  }
+//  }
-  os << "}";
+//  os << "}";
+//
-  return os;
+//  return os;
-}
+//}
+//
-std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
+// std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-  //  PADDLE_ENFORCE(t.type().hash_code() ==
+//  PADDLE_MOBILE_ENFORCE(t.type().hash_code() == typeid(float).hash_code(),
-  //  typeid(float).hash_code());
+//                        "t.type() is not float");
+//  os << "dim: " << t.dims() << "\n";
-  //  if (!platform::is_cpu_place(t.place())) {
+//  os << "lod: " << t.lod() << "\n";
-  //    LoDTensor tt;
+//  // only print first ten elements
-  //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
+//  int64_t size = t.numel() < 10 ? t.numel() : 10;
-  //    platform::DeviceContextPool &pool =
+//  for (int64_t i = 0; i < size; ++i) {
-  //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
+//    os << t.data<float>()[i] << " ";
-  //    *pool.Get(t.place()); dev_ctx.Wait();
+//  }
-  //
+//
-  //    os << tt;
+//  return os;
-  //    return os;
+//}
-  //  }
+// std::string LoDToString(const LoD &lod) {
-  os << "dim: " << t.dims() << "\n";
+//  std::ostringstream stream;
-  os << "lod: " << t.lod() << "\n";
+//  stream << lod;
+//  return stream.str();
-  // only print first ten elements
+//}
-  int64_t size = t.numel() < 10 ? t.numel() : 10;
-  for (int64_t i = 0; i < size; ++i) {
-    os << t.data<float>()[i] << " ";
-  }
-  return os;
-}
-std::string LoDToString(const LoD &lod) {
-  std::ostringstream stream;
-  stream << lod;
-  return stream.str();
-}
 LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
                 size_t elem_end) {
-  //  PADDLE_ENFORCE_LT(level, in.size());
+  PADDLE_MOBILE_ENFORCE(level < in.size(), "level should >= in.size()");
-  //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+  PADDLE_MOBILE_ENFORCE(elem_end < in[level].size(),
+                        "elem_end >= in[level].size()");
  LoD res;
  res.resize(in.size() - level);
  // copy the first level
@@ -152,7 +135,7 @@ bool CheckLoD(const LoD &in, int tensor_height) {
          if (a < b) return true;
          return false;
        })) {
-      std::cout << "ascending error";
+      PADDLE_MOBILE_THROW_EXCEPTION("ascending error")
      return false;
    }
  }
@@ -211,8 +194,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
  LoD sub_lod;
  for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
-    //    PADDLE_ENFORCE_LE(start_idx, end_idx);
+    PADDLE_MOBILE_ENFORCE(start_idx <= end_idx, "start_idx > end_idx");
-    //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
+    PADDLE_MOBILE_ENFORCE(end_idx < lod[level_idx].size(),
+                          "end_idx >= lod[level_idx].size()");
    std::vector<size_t> level_lens;
    for (size_t i = start_idx; i < end_idx; ++i) {
      level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
@@ -226,10 +210,9 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
 }
 void AppendLoD(LoD *lod, const LoD &lod_length) {
-  //  PADDLE_ENFORCE(
+  PADDLE_MOBILE_ENFORCE(
-  //      lod->empty() || lod->size() == lod_length.size(),
+      lod->empty() || lod->size() == lod_length.size(),
-  //      "The lod_length should has the same size with the appended
+      "The lod_length should has the same size with the appended lod.");
-  //      lod.");
  if (lod->empty()) {
    for (size_t i = 0; i < lod_length.size(); ++i) {
      lod->emplace_back(1, 0);  // size = 1, value = 0;

--- a/src/framework/lod_tensor.h
+++ b/src/framework/lod_tensor.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #include <memory>
 #include <string>
-#include <utility>
 #include <vector>
 #include "tensor.h"
 #include "tensor_util.h"

--- a/src/framework/op_info.h
+++ b/src/framework/op_info.h
@@ -25,9 +25,8 @@ template <typename Dtype>
 struct OpInfo {
  OpCreator<Dtype> creator_;
  const OpCreator<Dtype> &Creator() const {
-    //    PADDLE_ENFORCE_NOT_NULL(creator_,
+    PADDLE_MOBILE_ENFORCE(creator_ != nullptr,
-    //                            "Operator Creator has not been
+                          "Operator Creator has not been registered");
-    //                            registered");
    return creator_;
  }
 };
@@ -48,17 +47,15 @@ class OpInfoMap {
  }
  void Insert(const std::string &type, const OpInfo<Dtype> &info) {
-    //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
+    PADDLE_MOBILE_ENFORCE(!Has(type), "Operator %s has been registered",
-    //    registered", type);
+                          type.c_str());
    map_.insert({type, info});
  }
  const OpInfo<Dtype> &Get(const std::string &type) const {
    auto op_info_ptr = GetNullable(type);
-    //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
+    PADDLE_MOBILE_ENFORCE(op_info_ptr != nullptr,
-    //    been
+                          "Operator %s has not been registered", type.c_str());
-    //    registered",
-    //                            type);
    return *op_info_ptr;
  }

--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -16,7 +16,6 @@ limitations under the License. */
 #include <map>
 #include <string>
-#include <utility>
 #include <vector>
 #include "common/enforce.h"
@@ -27,7 +26,6 @@ limitations under the License. */
 #include "framework/op_info.h"
 #include "framework/op_kernel_type.h"
 #include "framework/op_registry.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/block_desc.h"
 #include "framework/program/program-optimize/node.h"
 #include "framework/scope.h"
@@ -52,7 +50,7 @@ static T *GetVarValue(const string &key, const VariableNameMap &var_map,
 }
 template <typename Dtype>
-class OperatorBase : PaddleMobileObject {
+class OperatorBase {
 public:
  /*
   *  @b op 基类的实例化方法, op 获取到了 输入、参数以及提前分配好的输出 tensor
@@ -121,7 +119,7 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
 * @b 所有kernel的父类
 * */
 template <typename Dtype, typename P>
-class OpKernelBase : PaddleMobileObject {
+class OpKernelBase {
 public:
  /*
   * @b 所有kernel 需实现 Compute 方法
@@ -139,14 +137,16 @@ class OpKernelBase : PaddleMobileObject {
      std::shared_ptr<::paddle_mobile::framework::Scope> scope)                \
      : parent_cls<Dtype, T>(type, inputs, outputs, attrs, scope) {}
-class FusionOpMatcher : PaddleMobileObject {
+class FusionOpMatcher {
 public:
  FusionOpMatcher() {}
  virtual std::string Type() = 0;
-  virtual void FolderNodes(Node *node) {
+  virtual void FolderNodes(
-    node->Folder(node_.Depth(), Type(), {});
+      Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(), {}, removed_nodes);
  }
  virtual Node &BeginNode() { return node_; }

--- a/src/framework/paddle_mobile_object.cpp
+++ b/src/framework/paddle_mobile_object.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle_mobile_object.h"
--- a/src/framework/program/block_desc.cpp
+++ b/src/framework/program/block_desc.cpp
@@ -17,13 +17,7 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace framework {
-std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
+std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const { return vars_; }
-  std::vector<std::shared_ptr<VarDesc>> res;
-  for (const auto &p : vars_) {
-    res.push_back(p.second);
-  }
-  return res;
-}
 std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const { return ops_; }
@@ -31,10 +25,14 @@ BlockDesc::BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc)
    : index_(desc->idx), parent_index_(desc->idx) {
  for (int i = 0; i < desc->n_vars; ++i) {
    PaddleMobile__Framework__Proto__VarDesc *var_desc = desc->vars[i];
-    vars_[std::string(var_desc->name)] =
+    vars_.emplace_back(std::shared_ptr<VarDesc>(new VarDesc(var_desc)));
-        std::shared_ptr<VarDesc>(new VarDesc(var_desc));
  }
+  std::sort(vars_.begin(), vars_.end(),
+            [](std::shared_ptr<VarDesc> left, std::shared_ptr<VarDesc> right) {
+              return left->Name() < right->Name();
+            });
  for (int j = 0; j < desc->n_ops; ++j) {
    PaddleMobile__Framework__Proto__OpDesc *op_desc = desc->ops[j];
    ops_.emplace_back(new framework::OpDesc(op_desc));

--- a/src/framework/program/block_desc.h
+++ b/src/framework/program/block_desc.h
@@ -15,14 +15,13 @@ limitations under the License. */
 #pragma once
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/op_desc.h"
 #include "framework/program/var_desc.h"
 namespace paddle_mobile {
 namespace framework {
-class BlockDesc : PaddleMobileObject {
+class BlockDesc {
 public:
  friend class Node;
  friend class ProgramOptimize;
@@ -35,10 +34,9 @@ class BlockDesc : PaddleMobileObject {
      ops_.push_back(copy_op_desc);
    }
-    for (auto &var_desc : block_desc.vars_) {
+    for (int i = 0; i < block_desc.vars_.size(); ++i) {
-      std::shared_ptr<VarDesc> copy_var_desc =
+      auto &var_desc = block_desc.vars_[i];
-          std::make_shared<VarDesc>(*var_desc.second);
+      vars_.emplace_back(std::make_shared<VarDesc>(*var_desc));
-      vars_[var_desc.first] = copy_var_desc;
    }
  }
@@ -64,7 +62,7 @@ class BlockDesc : PaddleMobileObject {
  bool multi_thread_;
  int parent_index_;
  std::vector<std::shared_ptr<OpDesc>> ops_;
-  std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
+  std::vector<std::shared_ptr<VarDesc>> vars_;
 };
 }  // namespace framework

--- a/src/framework/program/op_desc.h
+++ b/src/framework/program/op_desc.h
@@ -20,12 +20,11 @@ limitations under the License. */
 #include "common/log.h"
 #include "common/type_define.h"
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 namespace paddle_mobile {
 namespace framework {
-class OpDesc : PaddleMobileObject {
+class OpDesc {
 public:
  friend class ProgramOptimize;
  friend class FusionOpMatcher;

--- a/src/framework/program/program-optimize/fusion_op_register.cpp
+++ b/src/framework/program/program-optimize/fusion_op_register.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "fusion_op_register.h"
--- a/src/framework/program/program-optimize/node.cpp
+++ b/src/framework/program/program-optimize/node.cpp
@@ -12,10 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include <sstream>
-#include "framework/operator.h"
 #include "framework/program/program-optimize/node.h"
+#include "framework/operator.h"
 namespace paddle_mobile {
@@ -45,54 +43,13 @@ bool Node::operator==(const Node &in) {
  return true;
 }
-bool Node::CanSplit(std::unordered_set<std::string> complex_compute_set) {
+std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(int size) {
-  bool split = false;
-  CanSplit(&split, false, 0, &complex_compute_set, this);
-  return split;
-}
-void Node::CanSplit(bool *split, bool spliting, int complex_count,
-                    std::unordered_set<std::string> *complex_compute_set,
-                    Node *pre_node) {
-  if (spliting) {
-    if (complex_compute_set->find(this->type_) != complex_compute_set->end()) {
-      complex_count++;
-    }
-  }
-  if (inputs_.size() > 1 && pre_node != inputs_.back()) {
-    return;
-  }
-  if (inputs_.size() > 1 && pre_node == inputs_.back()) {
-    if (complex_count > 1) {
-      *split = true;
-      return;
-    }
-  }
-  // multi output, to check
-  if (outputs_.size() > 1) {
-    spliting = true;
-    complex_compute_set = 0;
-  } else {
-    if (spliting == true && inputs_.size() > 0) {
-      spliting = false;
-    } else {
-    }
-  }
-  for (auto &output : outputs_) {
-    output->CanSplit(split, spliting, complex_count, complex_compute_set, this);
-  }
-}
-std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs(uint size) {
  std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
  OpDescs(size - 1, &op_descs);
  return op_descs;
 }
-void Node::OpDescs(uint index,
+void Node::OpDescs(int index,
                   std::vector<std::shared_ptr<framework::OpDesc>> *op_desc) {
  if (index == 0) {
    return;
@@ -103,107 +60,6 @@ void Node::OpDescs(uint index,
  }
 }
-void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-                   Node *node, bool adding_thread, int thread_num) {
-  if (outputs_.size() > 1) {
-    adding_thread = false;
-  }
-  bool can_add_split = false;
-  // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
-  if (outputs_.size() > 1 &&
-      op_input_output_key[op_desc_->type_].second.size() == 1) {
-    can_add_split = true;
-    // 遍历当前节点的 output 节点
-    for (const auto &output : outputs_) {
-      // 不支持 output 有多个 output 的情况
-      if (output->outputs_.size() > 0) {
-        can_add_split = false;
-        break;
-      }
-      //与节点关联的 OpDesc
-      std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
-      //获取这个 op 的 inputs key 和 outputs key
-      auto inputs_and_outputs = op_input_output_key[op_desc->type_];
-      //判断现在 是否存在这个 op
-      //判断这个 output 和 input key 的 size 等于 1
-      if (op_input_output_key.find(op_desc->type_) !=
-              op_input_output_key.end() &&
-          inputs_and_outputs.first.size() == 1 &&
-          inputs_and_outputs.second.size() == 1) {
-        auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
-        auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
-        // 判断一下, 如果输入和输出没有同名, 是支持的
-        for (int i = 0; i < inputs_of_output.size(); ++i) {
-          std::string input_of_output = inputs_of_output[i];
-          for (int j = 0; j < outputs_of_output.size(); ++j) {
-            std::string output_of_output = outputs_of_output[j];
-            if (input_of_output == output_of_output) {
-              DLOG << "output的 output 包含 input" << input_of_output;
-              can_add_split = false;
-              break;
-            }
-          }
-        }
-      } else {  // 如果模型中包含没有的 op, 则不支持添加 split
-        DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
-        can_add_split = false;
-      }
-    }
-  }
-  if (inputs_.size() > 1 && node != inputs_.back()) {
-    return;
-  } else if (inputs_.size() > 1 && node == inputs_.back()) {
-    adding_thread = false;
-    op_desc->push_back(this->op_desc_);
-  } else {
-    op_desc->push_back(this->op_desc_);
-  }
-  if (adding_thread) {
-    Attribute attr;
-    attr.Set<int>(thread_num);
-    this->op_desc_->attrs_["thread"] = attr;
-  }
-  if (can_add_split) {
-    adding_thread = true;
-    std::shared_ptr<OpDesc> split_op_desc = std::make_shared<OpDesc>();
-    split_op_desc->type_ = G_OP_TYPE_SPLIT;
-    auto outputs = this->op_desc_->Output(
-        op_input_output_key[this->op_desc_->Type()].second[0]);
-    split_op_desc->inputs_ = {
-        {op_input_output_key[G_OP_TYPE_SPLIT].first[0], outputs}};
-    auto &split_outputs =
-        split_op_desc->outputs_[op_input_output_key[G_OP_TYPE_SPLIT].second[0]];
-    for (const auto &output : outputs_) {
-      split_outputs.push_back(outputs[0]);
-    }
-    DLOG << "add split";
-    op_desc->push_back(split_op_desc);
-  }
-  for (int i = 0; i < outputs_.size(); ++i) {
-    auto &output = outputs_[i];
-    if (can_add_split) {
-      output->OpDescs(op_desc, this, adding_thread, i);
-    } else {
-      output->OpDescs(op_desc, this, adding_thread, thread_num);
-    }
-  }
-}
-std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
-  std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
-  OpDescs(&op_descs, this, false, 0);
-  return op_descs;
-}
 std::shared_ptr<Node> Node::To(int size) {
  std::shared_ptr<Node> node = std::make_shared<Node>();
  this->To(size - 1, node);
@@ -224,24 +80,25 @@ void Node::To(int index, std::shared_ptr<Node> node) {
  }
 }
-uint Node::Depth(uint begin) {
+int Node::Depth(int begin) {
-  uint depth = 0;
+  int depth = 0;
  begin++;
  for (int i = 0; i < outputs_.size(); ++i) {
-    uint output_depth = outputs_[i]->Depth(begin);
+    int output_depth = outputs_[i]->Depth(begin);
    depth = output_depth > depth ? output_depth : depth;
  }
  return begin > depth ? begin : depth;
 }
 Node &Node::Folder(
-    uint size, std::string type,
+    int size, std::string type,
-    std::map<std::string, std::pair<std::string, std::string>> change) {
+    std::map<std::string, std::pair<std::string, std::string>> change,
+    std::vector<std::shared_ptr<Node>> *removed_nodes) {
  std::shared_ptr<framework::OpDesc> op_desc =
      std::make_shared<framework::OpDesc>();
  op_desc->inputs_ = this->op_desc_->inputs_;
  std::vector<std::shared_ptr<Node>> outputs;
-  this->Folder(op_desc, &outputs, size - 1, &change, this);
+  this->Folder(op_desc, &outputs, size - 1, &change, this, removed_nodes);
  this->outputs_ = outputs;
  this->type_ = type;
  this->op_desc_ = op_desc;
@@ -251,9 +108,9 @@ Node &Node::Folder(
 void Node::Folder(
    std::shared_ptr<framework::OpDesc> op_desc,
-    std::vector<std::shared_ptr<Node>> *outputs, uint index,
+    std::vector<std::shared_ptr<Node>> *outputs, int index,
    std::map<std::string, std::pair<std::string, std::string>> *change,
-    Node *begin_node) {
+    Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes) {
  if (change->find(this->type_) != change->end()) {
    auto change_pair = (*change)[this->type_];
    op_desc->GetInputs()[change_pair.second] =
@@ -266,7 +123,9 @@ void Node::Folder(
  if (index > 0) {
    --index;
    for (auto output : outputs_) {
-      output->Folder(op_desc, outputs, index, change, begin_node);
+      removed_nodes->push_back(output);
+      output->Folder(op_desc, outputs, index, change, begin_node,
+                     removed_nodes);
    }
  } else {
    for (auto &op_output : this->op_desc_->outputs_) {
@@ -285,7 +144,7 @@ void Node::Folder(
    }
  }
 }
+#ifdef PADDLE_MOBILE_DEBUG
 std::string Node::ToString(std::string blank, const Node *node) const {
  std::stringstream ss;
  ss << type_ << "-> \n";
@@ -316,6 +175,7 @@ Print &operator<<(Print &printer, const Node &node) {
  printer << node.ToString();
  return printer;
 }
+#endif
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/program/program-optimize/node.h
+++ b/src/framework/program/program-optimize/node.h
@@ -14,20 +14,17 @@ limitations under the License. */
 #pragma once
+#include <cinttypes>
 #include <map>
 #include <string>
-#include <unordered_set>
-#include <utility>
 #include <vector>
 #include "common/log.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/op_desc.h"
 namespace paddle_mobile {
 namespace framework {
-class Node : PaddleMobileObject {
+class Node {
  friend class ProgramOptimize;
 public:
@@ -37,35 +34,34 @@ class Node : PaddleMobileObject {
      : op_desc_(op_desc), type_(op_desc->Type()) {}
  Node &operator>(std::shared_ptr<Node> node);
  bool operator==(const Node &in);
-  bool CanSplit(std::unordered_set<std::string> complex_compute_set);
+#ifdef PADDLE_MOBILE_DEBUG
  std::string ToString() const;
+  void Description();
+#endif
  std::shared_ptr<Node> To(int size);
-  uint Depth(uint begin = 0);
+  int Depth(int begin = 0);
  Node &Folder(
-      uint size, std::string type,
+      int size, std::string type,
-      std::map<std::string, std::pair<std::string, std::string>> change_map);
+      std::map<std::string, std::pair<std::string, std::string>> change_map,
-  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(uint size);
+      std::vector<std::shared_ptr<Node>> *removed_nodes);
-  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs();
+  std::vector<std::shared_ptr<framework::OpDesc>> OpDescs(int size);
  std::shared_ptr<framework::OpDesc> OpDescOfNode() { return op_desc_; }
  std::string Type() { return type_; }
-  void Description();
 private:
-  void CanSplit(bool *split, bool spliting, int complex_count,
+  void OpDescs(int size,
-                std::unordered_set<std::string> *complex_compute_set,
-                Node *pre_node);
-  void OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-               Node *node, bool adding_thread, int thread_num);
-  void OpDescs(uint size,
               std::vector<std::shared_ptr<framework::OpDesc>> *op_desc);
  void To(int index, std::shared_ptr<Node>);
  void Folder(
      std::shared_ptr<framework::OpDesc> op_desc,
-      std::vector<std::shared_ptr<Node>> *outputs, uint index,
+      std::vector<std::shared_ptr<Node>> *outputs, int index,
      std::map<std::string, std::pair<std::string, std::string>> *change,
-      Node *begin_node);
+      Node *begin_node, std::vector<std::shared_ptr<Node>> *removed_nodes);
  std::shared_ptr<framework::OpDesc> op_desc_;
+#ifdef PADDLE_MOBILE_DEBUG
  std::string ToString(std::string blank, const Node *node) const;
+#endif
  std::vector<std::shared_ptr<Node>> outputs_;
  std::vector<Node *> inputs_;
  std::string type_;

--- a/src/framework/program/program-optimize/program_optimize.cpp
+++ b/src/framework/program/program-optimize/program_optimize.cpp
@@ -31,6 +31,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
    std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>>
        type_map;
+    std::vector<std::shared_ptr<Node>> nodes;
    std::shared_ptr<Node> begin_node;
    auto block = optimize_program->Block(i);
    //        DLOG << " ops size: " << block->Ops().size();
@@ -38,11 +40,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
      auto op = block->Ops()[j];
      auto op_type = op->Type();
      if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
-        LOG(kLOG_ERROR) << "return null ";
+        LOG(kLOG_ERROR) << "has not support op return null "
+                        << " op type: " << op->Type();
        return nullptr;
      }
      std::shared_ptr<Node> node = std::make_shared<Node>(op);
+      nodes.push_back(node);
      //
      type_map[op->Type()].push_back(node);
@@ -87,21 +91,26 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
          //          DLOG << " match success " << " fusion node: \n" <<
          //          matcher->BeginNode() << "\nsub node: \n" << *sub_node;
          //          DLOG << "match node\n"<< *match_node;
-          matcher->FolderNodes(match_node.get());
-          //          DLOG << " after match node\n"<< *match_node;
-          //          match_node->Description();
-          //          DLOG << "begin node: \n" << *begin_node;
+          std::vector<std::shared_ptr<Node>> removed_nodes;
+          matcher->FolderNodes(match_node.get(), &removed_nodes);
+          for (int j = 0; j < removed_nodes.size(); ++j) {
+            auto removed_node = removed_nodes[j];
+            auto removed_ite =
+                std::find(nodes.begin(), nodes.end(), removed_node);
+            nodes.erase(removed_ite);
+          }
        }
      }
    }
-    //    DLOG << "node: \n" << *begin_node;
    std::vector<std::shared_ptr<framework::OpDesc>> op_descs;
-    //    bool can_splite = begin_node->CanSplit({G_OP_TYPE_CONV,
+    for (int m = 0; m < nodes.size(); ++m) {
-    //    G_OP_TYPE_BATCHNORM, G_OP_TYPE_DEPTHWISE_CONV});
+      auto &node = nodes[m];
-    GenerateOps(&op_descs, begin_node.get());
+      op_descs.push_back(node->op_desc_);
+    }
+    //    GenerateOps(&op_descs, begin_node.get());
    block->ops_ = op_descs;
  }
@@ -118,6 +127,14 @@ void ProgramOptimize::GenerateOps(
    Node *current_node) {
  if (current_node->inputs_.size() > 1 &&
      input_node != current_node->inputs_.back()) {
+    DLOG << " current type " << current_node->type_;
+    DLOG << " inputs size of current node > 0 ";
+    for (int i = 0; i < current_node->inputs_.size(); ++i) {
+      DLOG << " input i: " << current_node->inputs_[i]->type_;
+    }
    return;
  } else if (current_node->inputs_.size() > 1 &&
             input_node == current_node->inputs_.back()) {

--- a/src/framework/program/program.cpp
+++ b/src/framework/program/program.cpp
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-namespace paddle_mobile {
-namespace framework {}
-}  // namespace paddle_mobile
--- a/src/framework/program/program.h
+++ b/src/framework/program/program.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 #include "common/types.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/program_desc.h"
 #include "framework/scope.h"
@@ -23,12 +22,14 @@ namespace paddle_mobile {
 namespace framework {
 template <typename Dtype, Precision P = Precision::FP32>
-class Program : PaddleMobileObject {
+class Program {
 public:
  std::shared_ptr<ProgramDesc> originProgram;
  std::shared_ptr<ProgramDesc> optimizeProgram;
  std::shared_ptr<Scope> scope;
  std::string model_path;
+  std::string para_path;
+  bool is_commbine = false;
 private:
 };

--- a/src/framework/program/program_desc.h
+++ b/src/framework/program/program_desc.h
@@ -18,13 +18,12 @@ limitations under the License. */
 #include "common/types.h"
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/block_desc.h"
 namespace paddle_mobile {
 namespace framework {
-class ProgramDesc : PaddleMobileObject {
+class ProgramDesc {
 public:
  friend class Node;
  friend class ProgramOptimize;

--- a/src/framework/program/var_desc.h
+++ b/src/framework/program/var_desc.h
@@ -14,40 +14,14 @@ limitations under the License. */
 #pragma once
+#include <string>
 #include "framework/framework.pb-c.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/tensor_desc.h"
 namespace paddle_mobile {
 namespace framework {
-/*
-PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__BOOL = 0,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT16 = 1,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT32 = 2,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__INT64 = 3,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP16 = 4,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP32 = 5,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FP64 = 6,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR = 7,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__SELECTED_ROWS = 8,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FEED_MINIBATCH = 9,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__FETCH_LIST = 10,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__STEP_SCOPES = 11,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_RANK_TABLE = 12,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__LOD_TENSOR_ARRAY = 13,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__PLACE_LIST = 14,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__READER = 15,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__CHANNEL = 16,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__RAW = 17,
-        PADDLE_MOBILE__FRAMEWORK__PROTO__VAR_TYPE__TYPE__TUPLE = 18
-                                                                 */
 class VarDesc {
 public:
  VarDesc(const VarDesc &var_desc) {
@@ -56,14 +30,6 @@ class VarDesc {
    this->persistable_ = var_desc.persistable_;
    this->tensor_desc_ = var_desc.tensor_desc_;
    this->type_ = var_desc.type_;
-    /*
-     *
-     *  std::string name_;
-  bool persistable_;
-  TensorDesc tensor_desc_;
-  VarType_Type type_;
-  VarType_Type data_type_;
-     * */
  }
  VarDesc(PaddleMobile__Framework__Proto__VarDesc *desc) {
    type_ = (VarType_Type)desc->type->type;
@@ -102,39 +68,6 @@ class VarDesc {
  const TensorDesc &Tensor_desc() const { return tensor_desc_; }
-  //  const proto::VarType::ChannelDesc &channel_desc() const {
-  //    switch (desc_.type().type()) {
-  //      case proto::VarType::CHANNEL:
-  //        return desc_.type().channel();
-  //      default:
-  //        break;
-  //    }
-  //  }
-  //  proto::VarType::Type GetDataType() const {
-  //    switch (desc_.type().type()) {
-  //      case proto::VarType::CHANNEL:
-  //        return channel_desc().data_type();
-  //        break;
-  //      default:
-  //        return tensor_desc().data_type();
-  //    }
-  //  }
-  //  template <typename T>
-  //  std::vector<T> RepeatedToVector(
-  //      const google::protobuf::RepeatedField<T> &repeated_field) const {
-  //    std::vector<T> ret;
-  //    ret.reserve(repeated_field.size());
-  //    std::copy(repeated_field.begin(), repeated_field.end(),
-  //              std::back_inserter(ret));
-  //    return ret;
-  //  }
-  //  std::vector<int64_t> GetShape() const {
-  //    return this->RepeatedToVector(tensor_desc().dims());
-  //  }
 private:
  std::string name_;
  bool persistable_;

--- a/src/framework/scope.cpp
+++ b/src/framework/scope.cpp
@@ -22,7 +22,6 @@ namespace paddle_mobile {
 namespace framework {
 Scope &Scope::NewScope() const {
-  std::unique_lock<std::mutex> lock(mutex_);
  kids_.push_back(new Scope(this));
  return *kids_.back();
 }
@@ -72,11 +71,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
 }
 void Scope::DeleteScope(Scope *scope) const {
-  std::unique_lock<std::mutex> lock(mutex_);
  auto it = std::find(kids_.begin(), kids_.end(), scope);
  kids_.erase(it);
  delete scope;
-  // deferent
 }
 void Scope::EraseVars(const std::vector<std::string> &var_names) {
@@ -104,14 +101,6 @@ void Scope::Rename(const std::string &origin_name,
  vars_[new_name] = origin_it->second;
  vars_.erase(origin_it);
 }
-//
-//            std::string Scope::Rename(const std::string& origin_name)
-//            const {
-//                auto var_name = string::Sprintf("%p.%d", this,
-//                vars_.size());
-//                Rename(origin_name, var_name);
-//                return var_name;
-//            }
 Variable *Scope::FindVarLocally(const std::string &name) const {
  auto it = vars_.find(name);

--- a/src/framework/scope.h
+++ b/src/framework/scope.h
@@ -14,17 +14,16 @@ limitations under the License. */
 #pragma once
-#include <list>           //std::list
+#include <list>
-#include <mutex>          //std::mutex
+#include <unordered_map>
-#include <unordered_map>  //std::unordered_map
 #include "variable.h"
 namespace paddle_mobile {
 namespace framework {
 class Scope {
 public:
-  Scope() {}
+  Scope() = default;
-  ~Scope() {}
+  ~Scope() = default;
  Scope &NewScope() const;
@@ -70,8 +69,6 @@ class Scope {
  mutable std::unordered_map<std::string, Variable *> vars_;
  mutable std::list<Scope *> kids_;
  Scope const *parent_{nullptr};
-  mutable std::mutex mutex_;
 };
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -14,14 +14,15 @@ limitations under the License. */
 #pragma once
-#include <common/enforce.h>
 #include <cstdint>
 #include <cstring>
 #include <memory>
 #include <type_traits>
 #include <typeindex>
 #include <vector>
+#include "common/enforce.h"
+#include "common/enforce.h"
 #include "framework/data_layout.h"
 #include "framework/ddim.h"
 #include "memory/t_malloc.h"

--- a/src/framework/tensor_util.cpp
+++ b/src/framework/tensor_util.cpp
@@ -13,137 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "tensor_util.h"
-#include <algorithm>
-#include <limits>
-#include <vector>
 namespace paddle_mobile {
 namespace framework {
 void TensorCopy(const Tensor &src, Tensor *dst) {
-  //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
-  //  src.place() << " to
-  //  "
-  //          << dst_place;
  src.check_memory_size();
  dst->Resize(src.dims());
  dst->set_layout(src.layout());
  auto src_ptr = src.data<void>();
  auto dst_ptr = dst->mutable_data(src.type());
  auto size = src.numel() * SizeOfType(src.type());
  memory::Copy(dst_ptr, src_ptr, size);
 }
-void TensorCopySync(const Tensor &src, Tensor *dst) {
-  //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
-  //  src.place()
-  //          << " to " << dst_place;
-  src.check_memory_size();
-  dst->Resize(src.dims());
-  dst->set_layout(src.layout());
-  auto src_ptr = src.data<void>();
-  auto dst_ptr = dst->mutable_data(src.type());
-  auto size = src.numel() * SizeOfType(src.type());
-  memory::Copy(dst_ptr, src_ptr, size);
-}
-template <typename Predicate>
-struct AnyDTypeVisitor {
-  Predicate predicate_;
-  const Tensor &tensor_;
-  Tensor *out_;
-  AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
-      : predicate_(predicate), tensor_(tensor), out_(out) {}
-  template <typename T>
-  void operator()() const {
-    //    auto t = EigenVector<T>::Flatten(tensor_);
-    //    auto o = EigenScalar<bool>::From(*out_);
-    // return any of predicate_(t) is true.
-    //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
-  }
-};
-template <typename Predicate>
-inline void AnyImpl(Predicate predicate, const Tensor &tensor,
-                    framework::Tensor *out) {
-  VisitDataType(ToDataType(tensor.type()),
-                AnyDTypeVisitor<Predicate>(predicate, tensor, out));
-}
-template <typename Predicate>
-struct AnyVisitor {
-  const framework::Tensor &tensor_;
-  Predicate predicate_;
-  AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
-      : tensor_(tensor), predicate_(std::move(predicate)) {}
-  bool operator()(void) const {
-    framework::Tensor out;
-    out.Resize({1});
-    out.mutable_data<bool>();
-    AnyImpl(predicate_, tensor_, &out);
-    return this->GetResult(out);
-  }
-  bool GetResult(const framework::Tensor &out) const {
-    return *out.data<bool>();
-  }
-};
-template <typename Predicate>
-inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
-  AnyVisitor<Predicate> visitor(tensor, predicate);
-  //  return platform::VisitPlace(visitor);
-  return visitor();
-}
-struct ContainsNANPredicate {
-  template <typename T>
-  auto operator()(const T &eigen_vec) const
-      -> decltype(std::declval<T>().isnan()) {
-    // Cast eigen_vector to vector of bool. true if is inf.
-    return eigen_vec.isnan();
-  }
-};
-bool TensorContainsNAN(const framework::Tensor &tensor) {
-  ContainsNANPredicate predicate;
-  return Any(tensor, predicate);
-}
-struct ContainsInfPredicate {
-  template <typename T>
-  auto operator()(const T &eigen_vec) const
-      -> decltype(std::declval<T>().isinf()) {
-    // Cast eigen_vector to vector of bool. true if is inf.
-    return eigen_vec.isinf();
-  }
-};
-bool TensorContainsInf(const framework::Tensor &tensor) {
-  ContainsInfPredicate predicate;
-  return Any(tensor, predicate);
-}
-struct DeserializedDataFunctor {
-  DeserializedDataFunctor(void **buf, Tensor *tensor)
-      : buf_(buf), tensor_(tensor) {}
-  template <typename T>
-  void operator()() {
-    *buf_ = tensor_->mutable_data<T>();
-  }
-  void **buf_;
-  Tensor *tensor_;
-};
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
@@ -15,51 +15,12 @@ limitations under the License. */
 #pragma once
 #include <vector>
 #include "memory/t_malloc.h"
-#include "platform/data_type.h"
 #include "tensor.h"
 namespace paddle_mobile {
 namespace framework {
 void TensorCopy(const Tensor &src, Tensor *dst);
-void TensorCopySync(const Tensor &src, Tensor *dst);
-template <typename T>
-void TensorFromVector(const std::vector<T> &src, Tensor *dst);
-template <typename T>
-void TesnorToVector(const Tensor &src, std::vector<T> *dst);
-bool TensorContainsNAN(const framework::Tensor &tensor);
-bool TensorContainsInf(const framework::Tensor &tensor);
-void TensorToStream(std::ostream &os, const Tensor &tensor);
-void TensorFromStream(std::istream &is, Tensor *tensor);
-//
-// The implementation of template functions.
-//
-template <typename T>
-void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
-  auto src_ptr = static_cast<const void *>(src.data());
-  dst->Resize({static_cast<int64_t>(src.size())});
-  auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
-  auto size = src.size() * sizeof(T);
-  memory::Copy(dst_ptr, src_ptr, size);
-}
-template <typename T>
-void TensorToVector(const Tensor &src, std::vector<T> *dst) {
-  auto src_ptr = static_cast<const void *>(src.data<T>());
-  auto size = src.numel() * sizeof(T);
-  dst->resize(src.numel());
-  auto dst_ptr = static_cast<void *>(dst->data());
-  memory::Copy(dst_ptr, src_ptr, size);
-}
 }  // namespace framework
 }  // namespace paddle_mobile
--- a/src/framework/var_type.h
+++ b/src/framework/var_type.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "framework.pb.h"
-#include "lod_tensor.h"
-#include "selected_rows.h"
-#include "variable.h"
-namespace paddle_mobile {
-namespace framework {
-inline proto::VarType::Type ToVarType(std::type_index type) {
-  if (type.hash_code() == typeid(LoDTensor).hash_code()) {
-    return proto::VarType_Type_LOD_TENSOR;
-  } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
-    return proto::VarType_Type_SELECTED_ROWS;
-  } else {
-    //    PADDLE_THROW("ToVarType:Unsupported type %s",
-    //    type.name());
-  }
-}
-}  // namespace framework
-}  // namespace paddle_mobile
--- a/src/framework/variable.h
+++ b/src/framework/variable.h
@@ -14,19 +14,17 @@ limitations under the License. */
 #pragma once
-#include <iostream>
 #include <memory>
 #include <string>
 #include <typeindex>
 #include <typeinfo>
 #include "../common/variant.h"
-#include "paddle_mobile_object.h"
 namespace paddle_mobile {
 namespace framework {
 using std::string;
-class Variable : public PaddleMobileObject {
+class Variable {
 public:
  template <typename T>
  const T *Get() const {

--- a/src/io.cpp
+++ b/src/io.cpp
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "io.h"
-#include <fstream>
 #include <vector>
-#include "common/log.h"
 #include "common/enforce.h"
+#include "common/log.h"
 #include "framework/framework.pb-c.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
@@ -30,16 +29,20 @@ limitations under the License. */
 namespace paddle_mobile {
 using framework::Variable;
-void ReadBinaryFile(const std::string &filename, std::string *contents) {
+char *Get_binary_data(std::string filename) {
-  std::ifstream fin(filename, std::ios::in | std::ios::binary);
+  FILE *file = fopen(filename.c_str(), "rb");
-  PADDLE_MOBILE_ENFORCE(fin.is_open(), "open file: %s failed",
+  PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
                        filename.c_str());
-  fin.seekg(0, std::ios::end);
+  fseek(file, 0, SEEK_END);
-  contents->clear();
+  long size = ftell(file);
-  contents->resize(fin.tellg());
+  PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
-  fin.seekg(0, std::ios::beg);
+  rewind(file);
-  fin.read(&(contents->at(0)), contents->size());
+  char *data = new char[size];
-  fin.close();
+  size_t bytes_read = fread(data, 1, size, file);
+  PADDLE_MOBILE_ENFORCE(bytes_read == size,
+                        "read binary file bytes do not match with fseek");
+  fclose(file);
+  return data;
 }
 static size_t ReadBuffer(const char *file_name, uint8_t **out) {
@@ -66,110 +69,27 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
 }
 template <typename Dtype, Precision P>
-void Loader<Dtype, P>::LoadVar(framework::Variable *variable,
+const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-                               const framework::VarDesc &var_desc,
+    const std::string &dirname, bool optimize) {
-                               const std::string &file_path) {
+  auto program = this->LoadProgram(dirname + "/__model__", optimize);
-  auto tensor = variable->GetMutable<framework::LoDTensor>();
+  program.model_path = dirname;
-  std::ifstream is(file_path);
+  return program;
-  PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed",
-                        file_path.c_str());
-  std::fpos<mbstate_t> pos;
-  pos = is.tellg();  // save   current   position
-  is.seekg(0, std::ios::end);
-  is.seekg(pos);  // restore   saved   position
-  // 1. version
-  uint32_t version;
-  is.read(reinterpret_cast<char *>(&version), sizeof(version));
-  // 2 Lod information
-  uint64_t lod_level;
-  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
-  auto &lod = *tensor->mutable_lod();
-  lod.resize(lod_level);
-  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size;
-    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-    std::vector<size_t> tmp(size / sizeof(size_t));
-    is.read(reinterpret_cast<char *>(tmp.data()),
-            static_cast<std::streamsize>(size));
-    for (auto j : tmp) {
-      LOG(kLOG_DEBUG1) << "    lod - " << j;
-    }
-    lod[i] = tmp;
-  }
-  // 3. tensor version
-  uint32_t tensor_version;
-  is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
-  // 4. tensor desc
-  int32_t size;
-  is.read(reinterpret_cast<char *>(&size), sizeof(size));
-  std::unique_ptr<char[]> buf(new char[size]);
-  is.read(reinterpret_cast<char *>(buf.get()), size);
-  const framework::TensorDesc &desc = var_desc.Tensor_desc();
-  PaddleMobile__Framework__Proto__VarType__TensorDesc *tensor_desc = NULL;
-  //  void *v;
-  //  PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure()(tensor_desc,
-  //  buf.get());
-  //  DLOG << "PaddleMobile__Framework__Proto__VarType__TensorDesc_Closure- " <<
-  //  tensor_desc;
-  //  framework::TensorDesc &tensor_desc = variable->
-  //  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
-  //  uint8_t *proto_buf = NULL;
-  //  size_t read_size = ReadBuffer(file_path.c_str(), &proto_buf);
-  //  c_program = paddle_mobile__framework__proto__program_desc__unpack(NULL,
-  //  read_size, buf);
-  //  paddle_mobile__framework__proto__var_type__tensor_desc__init()
-  int memory_size = 1;
-  for (auto l : desc.Dims()) {
-    memory_size *= l;
-  }
-  tensor->Resize(framework::make_ddim(desc.Dims()));
-  void *memory = tensor;
-  int type_size = 0;
-  switch (desc.DataType()) {
-    case framework::VARTYPE_TYPE_FP16:
-      type_size = 2;
-      break;
-    case framework::VARTYPE_TYPE_FP32:
-      type_size = 4;
-      memory = tensor->mutable_data<float>();
-      break;
-    case framework::VARTYPE_TYPE_FP64:
-      type_size = 8;
-      break;
-    case framework::VARTYPE_TYPE_INT32:
-      type_size = 4;
-      break;
-    case framework::VARTYPE_TYPE_INT64:
-      type_size = 8;
-      break;
-    case framework::VARTYPE_TYPE_BOOL:
-      type_size = 1;
-      break;
-    default:
-      break;
-  }
-  is.read(static_cast<char *>(memory), memory_size * type_size);
-  is.close();
 }
 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &dirname, bool optimize) {
+    const std::string &model_path, const std::string &para_path,
-  std::string model_filename = dirname + "/__model__";
+    bool optimize) {
+  auto program = this->LoadProgram(model_path, optimize);
+  program.para_path = para_path;
+  program.is_commbine = true;
+  return program;
+}
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
+    const std::string &model_path, bool optimize) {
+  std::string model_filename = model_path;
  PaddleMobile__Framework__Proto__ProgramDesc *c_program;
  uint8_t *buf = NULL;
  size_t read_size = ReadBuffer(model_filename.c_str(), &buf);
@@ -183,22 +103,16 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
  //
  DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
  //
-  std::shared_ptr<framework::ProgramDesc> originProgramDesc =
+  auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program);
-      std::make_shared<framework::ProgramDesc>(c_program);
  framework::Program<Dtype, P> program;
-  program.model_path = dirname;
  program.originProgram = originProgramDesc;
-  std::shared_ptr<framework::Scope> scope =
+  auto scope = std::make_shared<framework::Scope>();
-      std::make_shared<framework::Scope>();
  program.scope = scope;
-  originProgramDesc->Block(0);
  for (const auto &block : originProgramDesc->Blocks()) {
-    for (int i = 0; i < block->Vars().size(); ++i) {
+    for (auto var_desc : block->Vars()) {
-      std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
-      //      DLOG << "var name-- " << var_desc->Name();
      auto var = scope->Var(var_desc->Name());
      if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
@@ -221,6 +135,8 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
    }
  }
+  //  originProgramDesc->Description("program: ");
  if (optimize) {
    framework::ProgramOptimize program_optimize;
    program.optimizeProgram =
@@ -267,36 +183,38 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
      ops_of_block_[*block_desc.get()].push_back(op_base);
    }
  }
-  InitMemory();
+  if (program_.is_commbine) {
+    InitCombineMemory();
+  } else {
+    InitMemory();
+  }
 }
 template <typename Dtype, Precision P>
 void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
-                                    framework::LoDTensor *tensor,
+                                    framework::LoDTensor *tensor, char *&data) {
-                                    const std::string &file_path) {
-  std::ifstream is(file_path);
-  PADDLE_MOBILE_ENFORCE(is.is_open(), "open file: %s failed",
-                        file_path.c_str());
-  std::fpos<mbstate_t> pos;
-  pos = is.tellg();  // save   current   position
-  is.seekg(0, std::ios::end);
-  is.seekg(pos);  // restore   saved   position
  // 1. version
-  uint32_t version;
+  uint32_t version = *(uint32_t *)data;
-  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  data += sizeof(uint32_t);
  // 2 Lod information
-  uint64_t lod_level;
+  uint64_t lod_level = *(uint64_t *)data;
-  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+  data += sizeof(uint64_t);
  auto &lod = *tensor->mutable_lod();
  lod.resize(lod_level);
  for (uint64_t i = 0; i < lod_level; ++i) {
-    uint64_t size;
+    uint64_t size = *(uint64_t *)data;
-    is.read(reinterpret_cast<char *>(&size), sizeof(size));
+    data += sizeof(uint64_t);
+    DLOG << "lod size: " << i << size;
    std::vector<size_t> tmp(size / sizeof(size_t));
-    is.read(reinterpret_cast<char *>(tmp.data()),
-            static_cast<std::streamsize>(size));
+    for (int k = 0; k < tmp.size(); ++k) {
+      tmp[k] = *(size_t *)data;
+      DLOG << "tmp[k]: " << k << *(size_t *)data;
+      data += sizeof(size_t);
+    }
    for (auto j : tmp) {
      LOG(kLOG_DEBUG1) << "    lod - " << j;
    }
@@ -304,17 +222,20 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
  }
  // 3. tensor version
-  uint32_t tensor_version;
+  uint32_t tensor_version = *(uint32_t *)data;
-  is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
+  data += sizeof(uint32_t);
  // 4. tensor desc
-  int32_t size;
+  int32_t size = *(int32_t *)data;
-  is.read(reinterpret_cast<char *>(&size), sizeof(size));
+  data += sizeof(int32_t);
  std::unique_ptr<char[]> buf(new char[size]);
-  is.read(reinterpret_cast<char *>(buf.get()), size);
+  for (int m = 0; m < size; ++m) {
+    buf.get()[m] = data[m];
+  }
+  data += (sizeof(char) * size);
  const framework::TensorDesc &desc = var_desc.Tensor_desc();
  int memory_size = 1;
  for (auto l : desc.Dims()) {
    memory_size *= l;
@@ -348,8 +269,10 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
      break;
  }
-  is.read(static_cast<char *>(memory), memory_size * type_size);
+  for (int n = 0; n < memory_size * type_size; ++n) {
-  is.close();
+    static_cast<char *>(memory)[n] = data[n];
+  }
+  data += (sizeof(char) * memory_size * type_size);
 }
 template <typename Dtype, Precision P>
@@ -362,8 +285,12 @@ void Executor<Dtype, P>::InitMemory() {
        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
          continue;
        }
-        LoadMemory(*var_desc, tensor,
-                   program_.model_path + "/" + var_desc->Name());
+        char *origin_data =
+            Get_binary_data(program_.model_path + "/" + var_desc->Name());
+        char *data = origin_data;
+        LoadMemory(*var_desc, tensor, data);
+        delete origin_data;
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
          auto tensor = var->template GetMutable<framework::LoDTensor>();
@@ -375,6 +302,30 @@ void Executor<Dtype, P>::InitMemory() {
  }
 }
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::InitCombineMemory() {
+  char *origin_data = Get_binary_data(program_.para_path);
+  char *data = origin_data;
+  for (const auto &block : to_predict_program_->Blocks()) {
+    for (const auto &var_desc : block->Vars()) {
+      auto var = program_.scope->Var(var_desc->Name());
+      if (var_desc->Persistable()) {
+        auto tensor = var->template GetMutable<framework::LoDTensor>();
+        if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
+          continue;
+        }
+        LoadMemory(*var_desc, tensor, data);
+      } else {
+        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
+          auto tensor = var->template GetMutable<framework::LoDTensor>();
+          tensor->template mutable_data<Ptype>();
+        }
+      }
+    }
+  }
+  delete origin_data;
+}
 template <typename Dtype, Precision P>
 std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
    const framework::Tensor &t) {

--- a/src/io.h
+++ b/src/io.h
@@ -14,51 +14,73 @@ limitations under the License. */
 #pragma once
-#include <memory.h>
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
 #include "common/types.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
-#include "framework/paddle_mobile_object.h"
 #include "framework/program/program.h"
 #include "framework/tensor.h"
 namespace paddle_mobile {
-template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype = CPU, Precision P = Precision::FP32>
-class Loader : PaddleMobileObject {
+class Loader {
 public:
+  /*
+   * @b load separate format fluid model
+   * @b 加载分开形式的 fluid 模型
+   * */
  const framework::Program<Dtype, P> Load(const std::string &dirname,
-                                          bool optimize = true);
+                                          bool optimize = false);
+  /*
+   * @b load combine format fluid mode
+   * @b 加载结合在一起格式的模型
+   * */
+  const framework::Program<Dtype, P> Load(const std::string &model_path,
+                                          const std::string &para_path,
+                                          bool optimize = false);
 private:
-  void LoadVar(framework::Variable *variable,
+  const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
-               const framework::VarDesc &var_desc,
+                                                 bool optimize = false);
-               const std::string &file_path);
 };
-template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype = CPU, Precision P = Precision::FP32>
 class Executor {
 public:
  typedef typename PrecisionTrait<P>::ptype Ptype;
+  /*
+   * @b init executor with program load by Loader class
+   * @b 用 loader load 的 program 实例化 executor
+   * */
  Executor(const framework::Program<Dtype> p, int batch_size = 1,
           bool use_optimize = true);
+  /*
+   * @b to predict
+   * */
  std::shared_ptr<framework::Tensor> Predict(const framework::Tensor &t);
+  /*
+   * @b to predict with vector and dim
+   *
+   * @b 使用 输入 和 输入的维度信息 进行预测
+   * */
  std::vector<Ptype> Predict(const std::vector<Ptype> &input,
                             const std::vector<int64_t> &dims);
 protected:
  Executor() = default;
  void InitMemory();
  void LoadMemory(const framework::VarDesc var_desc,
-                  framework::LoDTensor *tensor, const std::string &file_path);
+                  framework::LoDTensor *tensor, char *&data);
+  void InitCombineMemory();
  framework::Program<Dtype> program_;
  int batch_size_ = 1;
  std::shared_ptr<framework::ProgramDesc> to_predict_program_;

--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef ANDROID
+#include "paddle_mobile_jni.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+namespace paddle_mobile {
+namespace jni {
+using framework::DDim;
+using framework::Program;
+using framework::Tensor;
+using paddle_mobile::CPU;
+using std::string;
+extern const char *ANDROID_LOG_TAG =
+    "paddle_mobile LOG built on " __DATE__ " " __TIME__;
+static Executor<CPU> *shared_executor_instance = nullptr;
+// toDo mutex lock
+// static std::mutex shared_mutex;
+Executor<CPU> *getExecutorInstance(const Program<CPU> p, int batch_size,
+                                   bool use_optimize) {
+  if (nullptr == shared_executor_instance) {
+    shared_executor_instance = new Executor<CPU>(p, batch_size, use_optimize);
+  }
+  return shared_executor_instance;
+}
+string jstring2cppstring(JNIEnv *env, jstring jstr) {
+  const char *cstr = env->GetStringUTFChars(jstr, 0);
+  string cppstr(cstr);
+  env->ReleaseStringUTFChars(jstr, cstr);
+  return cppstr;
+}
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
+                                                          jclass thiz,
+                                                          jstring modelPath) {
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  bool optimize = true;
+  auto program = loader.Load(jstring2cppstring(env, modelPath), optimize);
+  shared_executor_instance = getExecutorInstance(program, 1, optimize);
+  return shared_executor_instance != nullptr ? JNI_TRUE : JNI_FALSE;
+}
+JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
+    JNIEnv *env, jclass thiz, jfloatArray buf) {
+  jfloatArray result = NULL;
+  int count = 0;
+  float *dataPointer = nullptr;
+  if (nullptr != buf) {
+    dataPointer = env->GetFloatArrayElements(buf, NULL);
+  }
+  framework::Tensor input;
+  framework::DDim ddim = framework::make_ddim({1, 3, 224, 224});
+  input.Resize(ddim);
+  auto input_ptr = input.mutable_data<float>();
+  for (int i = 0; i < framework::product(ddim); i++) {
+    input_ptr[i] = dataPointer[i];
+  }
+  auto output = shared_executor_instance->Predict(input);
+  count = output->numel();
+  result = env->NewFloatArray(count);
+  env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+  return result;
+}
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
+                                                       jclass thiz) {}
+}  // namespace jni
+}  // namespace paddle_mobile
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/src/framework/data_transform.h
+++ b/src/framework/data_transform.h
@@ -13,25 +13,39 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
+#ifdef ANDROID
-#include <functional>
+#include <jni.h>
-#include <utility>
+#include "common/log.h"
-#include <vector>
-#include "framework/op_kernel_type.h"
-#include "framework/selected_rows.h"
 #include "framework/tensor.h"
-#include "framework/variable.h"
+#include "io/io.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
 namespace paddle_mobile {
-namespace framework {
+namespace jni {
+/**
-void DataTransform(const OpKernelType &expected_kernel_type,
+ * load model & params of the net for android
-                   const OpKernelType &kernel_type_for_var,
+ */
-                   const Tensor &input_tensor, Tensor *out);
+JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
+                                                          jclass thiz,
-void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
+                                                          jstring modelPath);
-                            Variable *out_var);
+/**
-}  // namespace framework
+ * object detection for anroid
+ */
+JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
+    JNIEnv *env, jclass thiz, jfloatArray buf);
+/**
+ * clear data of the net when destroy for android
+ */
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PMLL_clear(JNIEnv *env,
+                                                        jclass thiz);
+}  // namespace jni
 }  // namespace paddle_mobile
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -12,7 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #include "batchnorm_op.h"
+#include "framework/op_proto_maker.h"
+#include "framework/op_registry.h"
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +33,5 @@ template class BatchNormOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(batch_norm);
 REGISTER_OPERATOR(batch_norm, ops::BatchNormOp);
+#endif
--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #pragma once
 #include <string>
@@ -27,7 +29,7 @@ class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
 public:
  BatchNormOp(const string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
-              const framework::AttributeMap attrs,
+              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                  scope),
@@ -47,3 +49,5 @@ class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/box_coder_op.cpp
+++ b/src/operators/box_coder_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
 #include "operators/box_coder_op.h"
 #include <vector>
 namespace paddle_mobile {
@@ -52,3 +54,5 @@ template class BoxCoderOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(box_coder);
 REGISTER_OPERATOR(box_coder, ops::BoxCoderOp);
+#endif
--- a/src/operators/box_coder_op.h
+++ b/src/operators/box_coder_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
 #pragma once
 #include <string>
@@ -30,7 +32,7 @@ class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
 public:
  BoxCoderOp(const std::string &type, const VariableNameMap &inputs,
             const VariableNameMap &outputs,
-             const framework::AttributeMap attrs,
+             const framework::AttributeMap &attrs,
             std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                  scope),
@@ -50,3 +52,5 @@ class BoxCoderOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #include "concat_op.h"
 namespace paddle_mobile {
@@ -62,3 +64,5 @@ template class ConcatOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(concat);
 REGISTER_OPERATOR(concat, ops::ConcatOp);
+#endif
--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #pragma once
 #include <string>
@@ -25,7 +27,7 @@ template <typename DeviceType, typename T>
 class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
 public:
  ConcatOp(const string &type, const VariableNameMap &inputs,
-           const VariableNameMap &outputs, const framework::AttributeMap attrs,
+           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
           std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                  scope),
@@ -45,3 +47,5 @@ class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #include "operators/conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
@@ -23,30 +24,11 @@ namespace operators {
 template <typename Dtype, typename T>
 void ConvOp<Dtype, T>::InferShape() const {
-  //  std::cout << " begin get dims: " << std::endl;
  auto in_dims = param_.Input()->dims();
-  //  std::cout << " end get in dims: " << std::endl;
-  //  std::cout << " in_dims: " << in_dims << std::endl;
-  //  std::cout << " begin get Filter " << std::endl;
  auto filter_dims = param_.Filter()->dims();
-  //  std::cout << " end get Filter " << std::endl;
-  //  std::cout << " begin get Attrs " << std::endl;
  const std::vector<int> &strides = param_.Strides();
-  //  std::cout << " end get Attrs " << strides[0] << std::endl;
  std::vector<int> paddings = param_.Paddings();
  int groups = param_.Groups();
  std::vector<int> dilations = param_.Dilations();
  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
@@ -73,3 +55,5 @@ template class ConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(conv2d);
 REGISTER_OPERATOR(conv2d, ops::ConvOp);
+#endif
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #pragma once
 #include <string>
@@ -53,3 +55,5 @@ inline int ConvOutputSize(int input_size, int filter_size, int dilation,
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/depthwise_conv_op.cpp
+++ b/src/operators/depthwise_conv_op.cpp
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #include "operators/depthwise_conv_op.h"
 #include <vector>
-#include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
 #include "framework/op_registry.h"
 #include "operators/conv_op.h"
@@ -55,3 +56,5 @@ template class DepthwiseConvOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(depthwise_conv2d);
 REGISTER_OPERATOR(depthwise_conv2d, ops::DepthwiseConvOp);
+#endif
--- a/src/operators/depthwise_conv_op.h
+++ b/src/operators/depthwise_conv_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #pragma once
 #include <string>
@@ -47,3 +49,5 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<DeviceType> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #include "elementwise_add_op.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ template class ElementwiseAddOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(elementwise_add);
 REGISTER_OPERATOR(elementwise_add, ops::ElementwiseAddOp);
+#endif
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #pragma once
 #include <string>
@@ -27,7 +29,7 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
 public:
  ElementwiseAddOp(const string &type, const VariableNameMap &inputs,
                   const VariableNameMap &outputs,
-                   const framework::AttributeMap attrs,
+                   const framework::AttributeMap &attrs,
                   std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                  scope),
@@ -46,3 +48,5 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/framework/program/var_desc.cpp
+++ b/src/framework/program/var_desc.cpp
@@ -12,9 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "var_desc.h"
+#ifdef FUSIONCONVADD_OP
+#include "operators/fusion_conv_add.h"
 namespace paddle_mobile {
+namespace operators {
-namespace framework {}  // namespace framework
+template <typename Dtype, typename T>
+void FushionConvAddOp<Dtype, T>::InferShape() const {}
+template class FushionConvAddOp<CPU, float>;
+}  // namespace operators
 }  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+USE_OP(conv_add);
+REGISTER_OPERATOR(conv_add, ops::FushionConvAddOp);
+#endif
--- a/src/framework/selected_rows.h
+++ b/src/framework/selected_rows.h
@@ -12,66 +12,61 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSIONCONVADD_OP
 #pragma once
+#include <string>
 #include <vector>
-#include "lod_tensor.h"
+#include "framework/operator.h"
-#include "tensor.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
 namespace paddle_mobile {
-namespace framework {
+namespace operators {
+using std::string;
-class SelectedRows {
+using std::vector;
+class FusionConvAddMatcher : public framework::FusionOpMatcher {
 public:
-  SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
+  FusionConvAddMatcher() {
-      : rows_(rows), height_(height) {
+    node_ = framework::Node(G_OP_TYPE_CONV);
-    value_.reset(new Tensor());
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
  }
-  SelectedRows() {
+  void FolderNodes(
-    height_ = 0;
+      framework::Node *node,
-    value_.reset(new Tensor());
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    vector<std::shared_ptr<framework::OpDesc>> origin_descs =
+        node->OpDescs(node_.Depth());
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Y"}}}, removed_nodes);
  }
-  const Tensor &value() const { return *value_; }
+  std::string Type() { return G_OP_TYPE_CONV_ADD; }
+};
-  Tensor *mutable_value() { return value_.get(); }
-  int64_t height() const { return height_; }
-  void set_height(int64_t height) { height_ = height; }
-  const std::vector<int64_t> &rows() const { return rows_; }
-  std::vector<int64_t> *mutable_rows() { return &rows_; }
-  void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
+template <typename DeviceType, typename T>
+class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
+ public:
+  FushionConvAddOp(const string &type, const VariableNameMap &inputs,
+                   const VariableNameMap &outputs,
+                   const framework::AttributeMap &attrs,
+                   std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope) {}
-  /**
+  void RunImpl() const {}
-   * get the index of id in rows
-   */
-  int64_t index(int64_t id) const {
-    auto it = std::find(rows_.begin(), rows_.end(), id);
-    //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
-    return static_cast<int64_t>(std::distance(rows_.begin(), it));
-  }
-  DDim GetCompleteDims() const {
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    std::vector<int64_t> dims = vectorize(value_->dims());
+  void InferShape() const override;
-    dims[0] = height_;
-    return make_ddim(dims);
-  }
- private:
+ protected:
-  // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
+  //  FushionFcParam param_;
-  // here.
-  // SelectedRows are simply concated when adding together. Until a
-  // SelectedRows add a Tensor, will the duplicate rows be handled.
-  std::vector<int64_t> rows_;
-  std::unique_ptr<Tensor> value_{nullptr};
-  int64_t height_;
 };
-}  // namespace framework
+// static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
+}  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/fusion_conv_add_relu_op.cpp
+++ b/src/operators/fusion_conv_add_relu_op.cpp
@@ -12,4 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONVADDRELU_OP
 #include "fusion_conv_add_relu_op.h"
+#endif
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONVADDRELU_OP
 #pragma once
 #include "framework/operator.h"
@@ -28,16 +30,18 @@ class FushionConvAddReluOpMatcher : public framework::FusionOpMatcher {
        std::make_shared<framework::Node>(G_OP_TYPE_RELU);
  }
-  void FolderNodes(framework::Node *node) {
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
    std::vector<std::shared_ptr<framework::OpDesc>> origin_descs =
        node->OpDescs(node_.Depth());
    node->Folder(node_.Depth(), Type(),
-                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes);
  }
  std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_RELU; }
 };
-class FusionFcOp {
+class ConvAddReluOp {
 public:
 private:
 };
@@ -47,3 +51,5 @@ class FusionFcOp {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/fusion_fc_op.cpp
+++ b/src/operators/fusion_fc_op.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #include "operators/fusion_fc_op.h"
 namespace paddle_mobile {
 namespace operators {
@@ -54,3 +56,5 @@ template class FushionFcOp<CPU, float>;
 namespace ops = paddle_mobile::operators;
 USE_OP(fc);
 REGISTER_OPERATOR(fc, ops::FushionFcOp);
+#endif
--- a/src/operators/fusion_fc_op.h
+++ b/src/operators/fusion_fc_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #pragma once
 #include <string>
@@ -32,11 +34,13 @@ class FusionFcMatcher : public framework::FusionOpMatcher {
    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD);
  }
-  void FolderNodes(framework::Node *node) {
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
    vector<std::shared_ptr<framework::OpDesc>> origin_descs =
        node->OpDescs(node_.Depth());
    node->Folder(node_.Depth(), Type(),
-                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}});
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {"Y", "Z"}}}, removed_nodes);
  }
  std::string Type() { return G_OP_TYPE_FC; }
@@ -47,7 +51,7 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
 public:
  FushionFcOp(const string &type, const VariableNameMap &inputs,
              const VariableNameMap &outputs,
-              const framework::AttributeMap attrs,
+              const framework::AttributeMap &attrs,
              std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
                                                  scope),
@@ -65,7 +69,9 @@ class FushionFcOp : public framework::OperatorWithKernel<DeviceType> {
  FushionFcParam param_;
 };
-static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
+// static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
 #pragma once
 #include "operators/kernel/batchnorm_kernel.h"
@@ -47,7 +49,7 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
  Tensor inv_std;
  auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
  if (C != variance->numel()) {
-    std::cout << "C must equal to variance.numel()" << std::endl;
+    DLOG << "C must equal to variance.numel()";
  }
  assert(C == variance->numel());
@@ -91,3 +93,5 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/box_coder_kernel.cpp
+++ b/src/operators/kernel/arm/box_coder_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef BOXCODER_OP
 #include "operators/kernel/box_coder_kernel.h"
@@ -135,3 +135,5 @@ void BoxCoderKernel<CPU, float>::Compute(const BoxCoderParam& param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/concat_kernel.cpp
+++ b/src/operators/kernel/arm/concat_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef CONCAT_OP
 #include "operators/kernel/concat_kernel.h"
@@ -85,3 +85,5 @@ void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 #include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
@@ -112,3 +114,5 @@ template class ConvKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp
+++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
 #include "operators/kernel/depthwise_conv_kernel.h"
 #include "operators/kernel/conv_kernel.h"
@@ -124,3 +126,5 @@ template class DepthwiseConvKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef ELEMENTWISEADD_OP
 #pragma once
 #include "operators/kernel/elementwise_add_kernel.h"
@@ -40,3 +42,5 @@ template class ElementwiseAddKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/fushion_fc_kernel.cpp
+++ b/src/operators/kernel/arm/fushion_fc_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
 #pragma once
 #include "operators/kernel/fushion_fc_kernel.h"
@@ -65,3 +67,5 @@ void FushionFcKernel<CPU, float>::Compute(const FushionFcParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/lrn_kernel.cpp
+++ b/src/operators/kernel/arm/lrn_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
 #pragma once
 #include "operators/kernel/lrn_kernel.h"
@@ -42,3 +44,5 @@ template class LrnKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
 #pragma once
 #include "operators/kernel/mul_kernel.h"
@@ -48,3 +50,5 @@ template class MulKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/multiclass_nms_kernel.cpp
+++ b/src/operators/kernel/arm/multiclass_nms_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
 #pragma once
 #include "operators/kernel/multiclass_nms_kernel.h"
@@ -273,3 +275,5 @@ void MultiClassNMSKernel<CPU, float>::Compute(
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/pool_kernel.cpp
+++ b/src/operators/kernel/arm/pool_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #include <operators/kernel/pool_kernel.h>
 #include "common/log.h"
@@ -73,3 +75,5 @@ void PoolKernel<CPU, float>::Compute(const PoolParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/prior_box_kernel.cpp
+++ b/src/operators/kernel/arm/prior_box_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
 #pragma once
 #include "operators/kernel/prior_box_kernel.h"
@@ -143,3 +145,5 @@ void PriorBoxKernel<CPU, float>::Compute(const PriorBoxParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/relu_kernel.cpp
+++ b/src/operators/kernel/arm/relu_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
 #pragma once
 #include "operators/kernel/relu_kernel.h"
@@ -45,3 +47,5 @@ void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
 }
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/reshape_kernel.cpp
+++ b/src/operators/kernel/arm/reshape_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RESHAPE_OP
 #pragma once
 #include "operators/kernel/reshape_kernel.h"
@@ -49,3 +51,5 @@ void ReshapeKernel<CPU, float>::Compute(const ReshapeParam &param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/sigmoid_kernel.cpp
+++ b/src/operators/kernel/arm/sigmoid_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #include "../sigmoid_kernel.h"
 #if __ARM_NEON
 #include "../../math/math_func_neon.h"
@@ -25,35 +27,23 @@ using framework::Tensor;
 void sigmoid(const Tensor *X, Tensor *Y) {
 #if __ARM_NEON
-  DLOG << "step1";
  const float *input = X->data<float>();
-  DLOG << "step11";
  float *output = Y->mutable_data<float>();
-  DLOG << "step2";
  const DDim &dDim = X->dims();
-  DLOG << "step3";
  int axis_index = 1;
  if (dDim.size() < 4) {
    axis_index = 0;
  }
-  DLOG << "step4";
  DDim outer_ddim =
      paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
  DDim inner_ddim =
      paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
-  DLOG << "step5";
  int out_size = paddle_mobile::framework::product(outer_ddim);
  int inner_size = paddle_mobile::framework::product(inner_ddim);
-  DLOG << "step6";
-#pragma omp parallel for
  DLOG << "outsize=" << out_size;
  DLOG << "innersize=" << inner_size;
+  #pragma omp parallel for
  for (int i = 0; i < out_size; ++i) {
    const float *input_outer_ptr = input + i * inner_size;
    float *output_outer_ptr = output + i * inner_size;
@@ -93,3 +83,5 @@ void SigmoidKernel<CPU, float>::Compute(const SigmoidParam &param) const {
 template class SigmoidKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/softmax_kernel.cpp
+++ b/src/operators/kernel/arm/softmax_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #include "../softmax_kernel.h"
 #include "../../math/softmax.h"
 namespace paddle_mobile {
@@ -29,3 +31,5 @@ void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam &param) const {
 template class SoftmaxKernel<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/arm/transpose_kernel.cpp
+++ b/src/operators/kernel/arm/transpose_kernel.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef TRANSPOSE_OP
 #include "operators/kernel/transpose_kernel.h"
@@ -70,3 +70,5 @@ void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/batchnorm_kernel.h
+++ b/src/operators/kernel/batchnorm_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BATCHNORM_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -30,3 +33,5 @@ class BatchNormKernel
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/box_coder_kernel.h
+++ b/src/operators/kernel/box_coder_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef BOXCODER_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -31,3 +33,5 @@ class BoxCoderKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/concat_kernel.h
+++ b/src/operators/kernel/concat_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONCAT_OP
 #pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
@@ -29,3 +31,5 @@ class ConcatKernel : public framework::OpKernelBase<DeviceType, ConcatParam> {
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -12,6 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
@@ -19,8 +23,6 @@ limitations under the License. */
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -49,3 +51,5 @@ inline bool IsExpand(const std::vector<int64_t> &filter_dim,
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/depthwise_conv_kernel.h
+++ b/src/operators/kernel/depthwise_conv_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef DEPTHWISECONV_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/im2col.h"
 #include "operators/math/math_function.h"
 #include "operators/math/vol2col.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -32,3 +34,5 @@ class DepthwiseConvKernel : public OpKernelBase<DeviceType, ConvParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once;
+#ifdef ELEMENTWISEADD_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/elementwise_op_function.h"
@@ -31,3 +33,5 @@ class ElementwiseAddKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef CONV_OP
 namespace paddle_mobile {
 namespace operators {
@@ -22,3 +24,5 @@ namespace operators {
 // template class ConvKernel<FPGA, float>;
 }
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fushion_fc_kernel.h
+++ b/src/operators/kernel/fushion_fc_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef FUSION_FC_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +31,5 @@ class FushionFcKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/lrn_kernel.h
+++ b/src/operators/kernel/lrn_kernel.h
@@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef LRN_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -70,3 +73,5 @@ class LrnKernel : public framework::OpKernelBase<DeviceType, LrnParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/framework/paddle_mobile_object.h
+++ b/src/framework/paddle_mobile_object.h
@@ -12,21 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#pragma once
+#ifdef CONV_OP
-#include <string>
+#include "operators/kernel/conv_kernel.h"
-#include "stdio.h"
 namespace paddle_mobile {
+namespace operators {
-class PaddleMobileObject {
+template <>
- public:
+void ConvKernel<GPU_MALI, float>::Compute(const ConvParam &param) const {}
-  virtual std::string ToString() {
-    char address[128] = {0};
-    sprintf(address, "%p", this);
-    return std::string(address);
-  }
- private:
+template class ConvKernel<GPU_MALI, float>;
-};
+}  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MUL_OP
+#pragma once
 #include "framework/operator.h"
 #include "operators/math/math_function.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -29,3 +32,5 @@ class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/multiclass_nms_kernel.h
+++ b/src/operators/kernel/multiclass_nms_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef MULTICLASSNMS_OP
+#pragma once
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -28,3 +31,5 @@ class MultiClassNMSKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/pool_kernel.h
+++ b/src/operators/kernel/pool_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef POOL_OP
 #pragma once
 #include "framework/operator.h"
@@ -29,3 +31,5 @@ class PoolKernel : public OpKernelBase<DeviceType, PoolParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/prior_box_kernel.h
+++ b/src/operators/kernel/prior_box_kernel.h
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef PRIORBOX_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/math/transform.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -55,3 +57,5 @@ class PriorBoxKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/relu_kernel.h
+++ b/src/operators/kernel/relu_kernel.h
@@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef RELU_OP
+#pragma once
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -27,3 +30,5 @@ class ReluKernel : public framework::OpKernelBase<DeviceType, ReluParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/reshape_kernel.h
+++ b/src/operators/kernel/reshape_kernel.h
@@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include <vector>
+#ifdef RESHAPE_OP
+#pragma once
+#include <vector>
 #include "framework/operator.h"
-#include "operators/op_param.h"
-#pragma once;
+#include "operators/op_param.h"
 namespace paddle_mobile {
 namespace operators {
@@ -72,3 +74,5 @@ class ReshapeKernel : public framework::OpKernelBase<DeviceType, ReshapeParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/sigmoid_kernel.h
+++ b/src/operators/kernel/sigmoid_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SIGMOID_OP
 #pragma once
 #include "framework/operator.h"
@@ -27,3 +29,5 @@ class SigmoidKernel : public OpKernelBase<DeviceType, SigmoidParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/softmax_kernel.h
+++ b/src/operators/kernel/softmax_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef SOFTMAX_OP
 #pragma once
 #include "framework/operator.h"
@@ -30,3 +32,5 @@ class SoftmaxKernel : public OpKernelBase<DeviceType, SoftmaxParam> {
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/transpose_kernel.h
+++ b/src/operators/kernel/transpose_kernel.h
@@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#ifdef TRANSPOSE_OP
+#pragma once
 #include <vector>
 #include "framework/operator.h"
 #include "operators/op_param.h"
-#pragma once;
 namespace paddle_mobile {
 namespace operators {
@@ -30,3 +32,5 @@ class TransposeKernel
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+#endif
--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
--- a/src/operators/math/pool3x3.h
+++ b/src/operators/math/pool3x3.h
--- a/src/operators/math/pool_2x2.h
+++ b/src/operators/math/pool_2x2.h
--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
--- a/src/operators/math/softmax.cpp
+++ b/src/operators/math/softmax.cpp
--- a/src/operators/math/softmax.h
+++ b/src/operators/math/softmax.h
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
--- a/src/operators/multiclass_nms_op.cpp
+++ b/src/operators/multiclass_nms_op.cpp
--- a/src/operators/multiclass_nms_op.h
+++ b/src/operators/multiclass_nms_op.h
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
--- a/src/operators/prior_box_op.cpp
+++ b/src/operators/prior_box_op.cpp
--- a/src/operators/prior_box_op.h
+++ b/src/operators/prior_box_op.h
--- a/src/operators/relu_op.cpp
+++ b/src/operators/relu_op.cpp
--- a/src/operators/relu_op.h
+++ b/src/operators/relu_op.h
--- a/src/operators/reshape_op.cpp
+++ b/src/operators/reshape_op.cpp
--- a/src/operators/reshape_op.h
+++ b/src/operators/reshape_op.h
--- a/src/operators/sigmoid_op.cpp
+++ b/src/operators/sigmoid_op.cpp
--- a/src/operators/sigmoid_op.h
+++ b/src/operators/sigmoid_op.h
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
--- a/src/operators/transpose_op.cpp
+++ b/src/operators/transpose_op.cpp
--- a/src/operators/transpose_op.h
+++ b/src/operators/transpose_op.h
--- a/src/platform/data_type.h
+++ b/src/platform/data_type.h
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
--- a/test/common/test_gemm.cpp.cpp
+++ b/test/common/test_gemm.cpp.cpp
--- a/src/framework/program/tensor_desc.cpp
+++ b/src/framework/program/tensor_desc.cpp
--- a/test/common/test_lib_size.h
+++ b/test/common/test_lib_size.h
--- a/src/platform/hostdevice.h
+++ b/src/platform/hostdevice.h
--- a/test/executor_for_test.h
+++ b/test/executor_for_test.h
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
--- a/test/net/test_mobilenet+ssd.cpp
+++ b/test/net/test_mobilenet+ssd.cpp
--- a/test/operators/test_concat_op.cpp
+++ b/test/operators/test_concat_op.cpp
--- a/test/operators/test_cov_op.cpp
+++ b/test/operators/test_cov_op.cpp
--- a/test/operators/test_depthwise_conv_op.cpp
+++ b/test/operators/test_depthwise_conv_op.cpp
--- a/test/operators/test_elementwise_add_op.cpp
+++ b/test/operators/test_elementwise_add_op.cpp
--- a/test/operators/test_lrn_op.cpp
+++ b/test/operators/test_lrn_op.cpp
--- a/test/operators/test_mul_op.cpp
+++ b/test/operators/test_mul_op.cpp
--- a/test/operators/test_pool_op.cpp
+++ b/test/operators/test_pool_op.cpp
--- a/test/operators/test_relu_op.cpp
+++ b/test/operators/test_relu_op.cpp
--- a/test/operators/test_reshape_op.cpp
+++ b/test/operators/test_reshape_op.cpp
--- a/test/operators/test_sigmoid_op.cpp
+++ b/test/operators/test_sigmoid_op.cpp
--- a/test/operators/test_softmax_op.cpp
+++ b/test/operators/test_softmax_op.cpp
--- a/test/operators/test_transpose_op.cpp
+++ b/test/operators/test_transpose_op.cpp
--- a/test/test_helper.h
+++ b/test/test_helper.h
--- a/test/test_include.h
+++ b/test/test_include.h
--- a/build.sh
+++ b/build.sh
--- a/tools/pre-commit.hooks/clang-format.hook
+++ b/tools/pre-commit.hooks/clang-format.hook
--- a/scripts/push2android.sh
+++ b/scripts/push2android.sh