add clang-tidy support

8b8d7e08 · 朔-望 · 17535d85 · 8b8d7e08 · 8b8d7e08 · 8b8d7e08
37 changed file
--- a/.clang-tidy
+++ b/.clang-tidy
+---
+Checks:          'clang-diagnostic-*,clang-analyzer-*'
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
+AnalyzeTemporaryDtors: false
+FormatStyle:     none
+User:            allonli
+CheckOptions:    
+  - key:             google-readability-braces-around-statements.ShortStatementLines
+    value:           '1'
+  - key:             google-readability-function-size.StatementThreshold
+    value:           '800'
+  - key:             google-readability-namespace-comments.ShortNamespaceLines
+    value:           '10'
+  - key:             google-readability-namespace-comments.SpacesBeforeComments
+    value:           '2'
+  - key:             modernize-loop-convert.MaxCopySize
+    value:           '16'
+  - key:             modernize-loop-convert.MinConfidence
+    value:           reasonable
+  - key:             modernize-loop-convert.NamingStyle
+    value:           CamelCase
+  - key:             modernize-pass-by-value.IncludeStyle
+    value:           llvm
+  - key:             modernize-replace-auto-ptr.IncludeStyle
+    value:           llvm
+  - key:             modernize-use-nullptr.NullMacros
+    value:           'NULL'
+...
+
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,14 +20,14 @@ repos:
    -   id: trailing-whitespace
        files: (src).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$

-#-   repo: local
-#    hooks:
-#    -   id: clang-format-with-version-check
-#        name: clang-format
-#        description: Format files with ClangFormat.
-#        entry: bash ./tools/pre-commit.hooks/.clang_format.hook -i
-#        language: system
-#        files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
+-   repo: local
+    hooks:
+    -   id: clang-tidy
+        name: clang-tidy
+        description: Format files with tidy.
+        entry: bash ./tools/pre-commit.hooks/.clang-tidy.hook -i
+        language: system
+        files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
 #
 #-   repo: local
 #    hooks:

--- a/src/common/log.h
+++ b/src/common/log.h
@@ -27,66 +27,66 @@ SOFTWARE.

 namespace paddle_mobile {

-    enum LogLevel {
-        kNO_LOG,
-        kLOG_ERROR,
-        kLOG_WARNING,
-        kLOG_INFO,
-        kLOG_DEBUG,
-        kLOG_DEBUG1,
-        kLOG_DEBUG2,
-        kLOG_DEBUG3,
-        kLOG_DEBUG4
-    };
-
-    // log level
-    static LogLevel log_level = kLOG_DEBUG4;
-
-    static std::vector<std::string> logs{"NO",      "ERROR ",  "WARNING",
-                                         "INFO   ", "DEBUG  ", "DEBUG1 ",
-                                         "DEBUG2 ", "DEBUG3 ", "DEBUG4 "};
-
-    struct ToLog;
-
-    struct Print {
-        friend struct ToLog;
-        template <typename T> Print &operator<<(T const &value) {
-            buffer_ << value;
-            return *this;
-        }
-
-      private:
-        void print(LogLevel level) {
-            buffer_ << std::endl;
-            if (level == kLOG_ERROR) {
-                std::cerr << buffer_.str();
-            } else {
-                std::cout << buffer_.str();
-            }
-        }
-        std::ostringstream buffer_;
-    };
-
-    struct ToLog {
-        ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
-            : level_(level) {
-            unsigned blanks =
-                (unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
-            printer_ << logs[level] << " " << info << ":"
-                     << std::string(blanks, ' ');
-        }
-
-        template <typename T> ToLog &operator<<(T const &value) {
-            printer_ << value;
-            return *this;
-        }
-
-        ~ToLog() { printer_.print(level_); }
-
-      private:
-        LogLevel level_;
-        Print printer_;
-    };
+enum LogLevel {
+  kNO_LOG,
+  kLOG_ERROR,
+  kLOG_WARNING,
+  kLOG_INFO,
+  kLOG_DEBUG,
+  kLOG_DEBUG1,
+  kLOG_DEBUG2,
+  kLOG_DEBUG3,
+  kLOG_DEBUG4
+};
+
+// log level
+static LogLevel log_level = kLOG_DEBUG4;
+
+static std::vector<std::string> logs{"NO", "ERROR ", "WARNING",
+                                     "INFO   ", "DEBUG  ", "DEBUG1 ",
+                                     "DEBUG2 ", "DEBUG3 ", "DEBUG4 "};
+
+struct ToLog;
+
+struct Print {
+  friend struct ToLog;
+  template<typename T> Print &operator<<(T const &value) {
+    buffer_ << value;
+    return *this;
+  }
+
+private:
+  void print(LogLevel level) {
+    buffer_ << std::endl;
+    if (level == kLOG_ERROR) {
+      std::cerr << buffer_.str();
+    } else {
+      std::cout << buffer_.str();
+    }
+  }
+  std::ostringstream buffer_;
+};
+
+struct ToLog {
+  ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
+      : level_(level) {
+    unsigned blanks =
+        (unsigned) (level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
+    printer_ << logs[level] << " " << info << ":"
+             << std::string(blanks, ' ');
+  }
+
+  template<typename T> ToLog &operator<<(T const &value) {
+    printer_ << value;
+    return *this;
+  }
+
+  ~ToLog() { printer_.print(level_); }
+
+private:
+  LogLevel level_;
+  Print printer_;
+};

 #define LOG(level)                                                             \
    if (level > paddle_mobile::log_level) {                                    \

--- a/src/common/type_define.h
+++ b/src/common/type_define.h
@@ -23,31 +23,31 @@ SOFTWARE.

 namespace paddle_mobile {

-    namespace framework {
-        template <typename Dtype> class OperatorBase;
-        class OpDesc;
-        class BlockDesc;
-        class InferShapeContext;
-    }
-
-    using VariableNameMap = std::map<std::string, std::vector<std::string>>;
-
-    template <typename Dtype>
-    using OpCreator = std::function<framework::OperatorBase<Dtype> *(
-        const std::string & /*type*/, const VariableNameMap & /*inputs*/,
-        const VariableNameMap & /*outputs*/,
-        const framework::AttributeMap & /*attrs*/)>;
-
-    using GradOpMakerFN =
-        std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
-            const framework::OpDesc &,
-            const std::unordered_set<std::string> & /*no_grad_set*/,
-            std::unordered_map<std::string, std::string> * /*grad_to_var*/,
-            const std::vector<framework::BlockDesc *> &grad_block)>;
-
-    using InferVarTypeFN =
-        std::function<void(const framework::OpDesc & /*op_desc*/,
-                           framework::BlockDesc * /*block*/)>;
-
-    using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
+namespace framework {
+template<typename Dtype> class OperatorBase;
+class OpDesc;
+class BlockDesc;
+class InferShapeContext;
+}
+
+using VariableNameMap = std::map<std::string, std::vector<std::string>>;
+
+template<typename Dtype>
+using OpCreator = std::function<framework::OperatorBase<Dtype> *(
+    const std::string & /*type*/, const VariableNameMap & /*inputs*/,
+    const VariableNameMap & /*outputs*/,
+    const framework::AttributeMap & /*attrs*/)>;
+
+using GradOpMakerFN =
+std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
+    const framework::OpDesc &,
+    const std::unordered_set<std::string> & /*no_grad_set*/,
+    std::unordered_map<std::string, std::string> * /*grad_to_var*/,
+    const std::vector<framework::BlockDesc *> &grad_block)>;
+
+using InferVarTypeFN =
+std::function<void(const framework::OpDesc & /*op_desc*/,
+                   framework::BlockDesc * /*block*/)>;
+
+using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
 };
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -19,45 +19,45 @@ SOFTWARE.
 #pragma once;

 namespace paddle_mobile {
-    enum class Precision : int { FP32 = 0 };
+enum class Precision : int { FP32 = 0 };

-    //! device type
-    enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
+//! device type
+enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };

-    template <DeviceTypeEnum T> struct DeviceType {};
+template<DeviceTypeEnum T> struct DeviceType {};

-    typedef DeviceType<kCPU> CPU;
-    typedef DeviceType<kFPGA> FPGA;
-    typedef DeviceType<kGPU_MALI> GPU_MALI;
+typedef DeviceType<kCPU> CPU;
+typedef DeviceType<kFPGA> FPGA;
+typedef DeviceType<kGPU_MALI> GPU_MALI;

-    //! data type
-    enum DataType {
-        PM_INVALID = -1,
-        PM_HALF = 0,
-        PM_FLOAT = 1,
-        PM_DOUBLE = 2,
-        PM_INT8 = 3,
-        PM_INT16 = 4,
-        PM_INT32 = 5,
-        PM_INT64 = 6,
-        PM_UINT8 = 7,
-        PM_UINT16 = 8,
-        PM_UINT32 = 9,
-        PM_STRING = 10,
-        PM_BOOL = 11,
-        PM_SHAPE = 12,
-        PM_TENSOR = 13
-    };
-    //!
-    enum PMStatus {
-        PMSuccess = 0xFF,        /*!< No errors */
-        PMNotInitialized = 0x01, /*!< Data not initialized. */
-        PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
-        PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
-        PMUnKownError = 0x04,    /*!< Unknown error. */
-        PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
-        PMOutOfMem = 0x06,       /*!< OOM error*/
-        PMUnImplError = 0x07,    /*!< Unimplement error. */
-        PMWrongDevice = 0x08     /*!< un-correct device. */
-    };
+//! data type
+enum DataType {
+  PM_INVALID = -1,
+  PM_HALF = 0,
+  PM_FLOAT = 1,
+  PM_DOUBLE = 2,
+  PM_INT8 = 3,
+  PM_INT16 = 4,
+  PM_INT32 = 5,
+  PM_INT64 = 6,
+  PM_UINT8 = 7,
+  PM_UINT16 = 8,
+  PM_UINT32 = 9,
+  PM_STRING = 10,
+  PM_BOOL = 11,
+  PM_SHAPE = 12,
+  PM_TENSOR = 13
+};
+//!
+enum PMStatus {
+  PMSuccess = 0xFF,        /*!< No errors */
+  PMNotInitialized = 0x01, /*!< Data not initialized. */
+  PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
+  PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
+  PMUnKownError = 0x04,    /*!< Unknown error. */
+  PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
+  PMOutOfMem = 0x06,       /*!< OOM error*/
+  PMUnImplError = 0x07,    /*!< Unimplement error. */
+  PMWrongDevice = 0x08     /*!< un-correct device. */
+};
 }
--- a/src/common/variant.cpp
+++ b/src/common/variant.cpp
@@ -15,5 +15,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-
-#include "variant.h"
--- a/src/common/variant.h
+++ b/src/common/variant.h
@@ -21,79 +21,79 @@ SOFTWARE.
 #pragma once

 namespace paddle_mobile {
-    template <int ID, typename Type> struct IDToType { typedef Type type_t; };
+template<int ID, typename Type> struct IDToType { typedef Type type_t; };

-    template <typename F, typename... Ts> struct VariantHelper {
-        static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
-                                       ? sizeof(F)
-                                       : VariantHelper<Ts...>::size;
+template<typename F, typename... Ts> struct VariantHelper {
+  static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
+                             ? sizeof(F)
+                             : VariantHelper<Ts...>::size;

-        inline static void Destroy(size_t id, void *data) {
-            if (id == typeid(F).hash_code()) {
-                reinterpret_cast<F *>(data)->~F();
-            } else {
-                VariantHelper<Ts...>::Destroy(id, data);
-            }
-        }
-    };
+  inline static void Destroy(size_t id, void *data) {
+    if (id == typeid(F).hash_code()) {
+      reinterpret_cast<F *>(data)->~F();
+    } else {
+      VariantHelper<Ts...>::Destroy(id, data);
+    }
+  }
+};

-    template <typename F> struct VariantHelper<F> {
-        static const size_t size = sizeof(F);
-        inline static void Destroy(size_t id, void *data) {
-            if (id == typeid(F).hash_code()) {
-                //              reinterpret_cast<F*>(data)->~F();
-            } else {
-                //              std::cout << "未匹配到 " << std::endl;
-            }
-        }
-    };
+template<typename F> struct VariantHelper<F> {
+  static const size_t size = sizeof(F);
+  inline static void Destroy(size_t id, void *data) {
+    if (id == typeid(F).hash_code()) {
+      //              reinterpret_cast<F*>(data)->~F();
+    } else {
+      //              std::cout << "未匹配到 " << std::endl;
+    }
+  }
+};

-    template <size_t size> class RawData {
-      public:
-        char data[size];
-        RawData() {}
-        RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
-        //      void operator=(const RawData &raw_data){
-        //        strcpy(data, raw_data.data);
-        //      }
-    };
+template<size_t size> class RawData {
+public:
+  char data[size];
+  RawData() {}
+  RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
+  //      void operator=(const RawData &raw_data){
+  //        strcpy(data, raw_data.data);
+  //      }
+};

-    template <typename... Ts> struct Variant {
-        Variant(const Variant &variant) {
-            //        std::cout << " 赋值构造函数 " << std::endl;
-            type_id = variant.type_id;
-            data = variant.data;
-        }
+template<typename... Ts> struct Variant {
+  Variant(const Variant &variant) {
+    //        std::cout << " 赋值构造函数 " << std::endl;
+    type_id = variant.type_id;
+    data = variant.data;
+  }

-        Variant() : type_id(invalid_type()) {}
-        ~Variant() {
-            //        helper::Destroy(type_id, &data);
-        }
+  Variant() : type_id(invalid_type()) {}
+  ~Variant() {
+    //        helper::Destroy(type_id, &data);
+  }

-        template <typename T, typename... Args> void Set(Args &&... args) {
-            helper::Destroy(type_id, &data);
-            new (&data) T(std::forward<Args>(args)...);
-            type_id = typeid(T).hash_code();
-        }
+  template<typename T, typename... Args> void Set(Args &&... args) {
+    helper::Destroy(type_id, &data);
+    new(&data) T(std::forward<Args>(args)...);
+    type_id = typeid(T).hash_code();
+  }

-        template <typename T> T &Get() const {
-            if (type_id == typeid(T).hash_code()) {
-                return *const_cast<T *>(reinterpret_cast<const T *>(&data));
-            } else {
-                //      std::cout << " bad cast in variant " << std::endl;
-                throw std::bad_cast();
-            }
-        }
+  template<typename T> T &Get() const {
+    if (type_id == typeid(T).hash_code()) {
+      return *const_cast<T *>(reinterpret_cast<const T *>(&data));
+    } else {
+      //      std::cout << " bad cast in variant " << std::endl;
+      throw std::bad_cast();
+    }
+  }

-        size_t TypeId() const { return type_id; }
+  size_t TypeId() const { return type_id; }

-      private:
-        static inline size_t invalid_type() { return typeid(void).hash_code(); }
-        typedef VariantHelper<Ts...> helper;
-        size_t type_id;
-        RawData<helper::size> data;
-    };
+private:
+  static inline size_t invalid_type() { return typeid(void).hash_code(); }
+  typedef VariantHelper<Ts...> helper;
+  size_t type_id;
+  RawData<helper::size> data;
+};

-    template <typename T> struct Vistor { typedef T type_t; };
+template<typename T> struct Vistor { typedef T type_t; };

 } // namespace paddle_mobile
--- a/src/framework/attribute.cpp
+++ b/src/framework/attribute.cpp
@@ -16,8 +16,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/

-#include "attribute.h"
-
 namespace paddle_mobile {
-    namespace framework {}
+namespace framework {}
 } // namespace paddle_mobile
--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -22,110 +22,110 @@ SOFTWARE.
 #include "framework.pb.h"

 namespace paddle_mobile {
-    namespace framework {
+namespace framework {

-        class BlockDesc;
+class BlockDesc;

-        class Attribute {
-          public:
-            static Attribute
-            GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
-                //    std::cout << "begin get attr value" << std::endl;
-                Attribute attr;
-                switch (attr_desc.type()) {
-                case proto::AttrType::BOOLEAN: {
-                    attr.Set<bool>(attr_desc.b());
-                    break;
-                }
-                case proto::AttrType::INT: {
-                    attr.Set<int>(attr_desc.i());
-                    break;
-                }
-                case proto::AttrType::FLOAT: {
-                    attr.Set<float>(attr_desc.f());
-                    break;
-                }
-                case proto::AttrType::STRING: {
-                    attr.Set<std::string>(attr_desc.s());
-                    break;
-                }
-                case proto::AttrType::BOOLEANS: {
-                    std::vector<bool> val(attr_desc.bools_size());
-                    for (int i = 0; i < attr_desc.bools_size(); ++i) {
-                        val[i] = attr_desc.bools(i);
-                    }
-                    attr.Set<std::vector<bool>>(val);
-                    break;
-                }
-                case proto::AttrType::INTS: {
-                    std::vector<int> val(attr_desc.ints_size());
-                    for (int i = 0; i < attr_desc.ints_size(); ++i) {
-                        val[i] = attr_desc.ints(i);
-                    }
-                    attr.Set<std::vector<int>>(val);
-                    break;
-                }
-                case proto::AttrType::FLOATS: {
-                    std::vector<float> val(attr_desc.floats_size());
-                    for (int i = 0; i < attr_desc.floats_size(); ++i) {
-                        val[i] = attr_desc.floats(i);
-                    }
-                    attr.Set<std::vector<float>>(val);
-                    break;
-                }
-                case proto::AttrType::STRINGS: {
-                    std::vector<std::string> val(attr_desc.strings_size());
-                    for (int i = 0; i < attr_desc.strings_size(); ++i) {
-                        val[i] = attr_desc.strings(i);
-                    }
-                    attr.Set<std::vector<std::string>>(val);
-                    break;
-                }
-                case proto::AttrType::LONG: {
-                    attr.Set<int64_t>(attr_desc.l());
-                    break;
-                }
-                default:
-                    //        std::cout << " not support " << std::endl;
-                    break;
-                }
-                //    std::cout << "end get attr value" << std::endl;
-                return attr;
-            }
+class Attribute {
+public:
+  static Attribute
+  GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
+    //    std::cout << "begin get attr value" << std::endl;
+    Attribute attr;
+    switch (attr_desc.type()) {
+    case proto::AttrType::BOOLEAN: {
+      attr.Set<bool>(attr_desc.b());
+      break;
+    }
+    case proto::AttrType::INT: {
+      attr.Set<int>(attr_desc.i());
+      break;
+    }
+    case proto::AttrType::FLOAT: {
+      attr.Set<float>(attr_desc.f());
+      break;
+    }
+    case proto::AttrType::STRING: {
+      attr.Set<std::string>(attr_desc.s());
+      break;
+    }
+    case proto::AttrType::BOOLEANS: {
+      std::vector<bool> val(attr_desc.bools_size());
+      for (int i = 0; i < attr_desc.bools_size(); ++i) {
+        val[i] = attr_desc.bools(i);
+      }
+      attr.Set<std::vector<bool>>(val);
+      break;
+    }
+    case proto::AttrType::INTS: {
+      std::vector<int> val(attr_desc.ints_size());
+      for (int i = 0; i < attr_desc.ints_size(); ++i) {
+        val[i] = attr_desc.ints(i);
+      }
+      attr.Set<std::vector<int>>(val);
+      break;
+    }
+    case proto::AttrType::FLOATS: {
+      std::vector<float> val(attr_desc.floats_size());
+      for (int i = 0; i < attr_desc.floats_size(); ++i) {
+        val[i] = attr_desc.floats(i);
+      }
+      attr.Set<std::vector<float>>(val);
+      break;
+    }
+    case proto::AttrType::STRINGS: {
+      std::vector<std::string> val(attr_desc.strings_size());
+      for (int i = 0; i < attr_desc.strings_size(); ++i) {
+        val[i] = attr_desc.strings(i);
+      }
+      attr.Set<std::vector<std::string>>(val);
+      break;
+    }
+    case proto::AttrType::LONG: {
+      attr.Set<int64_t>(attr_desc.l());
+      break;
+    }
+    default:
+      //        std::cout << " not support " << std::endl;
+      break;
+    }
+    //    std::cout << "end get attr value" << std::endl;
+    return attr;
+  }

-            Attribute() {}
-            template <typename T, typename... Args>
-            Attribute &Set(Args &&... args) {
-                variant_.Set<T>(args...);
-                return *this;
-            }
+  Attribute() {}
+  template<typename T, typename... Args>
+  Attribute &Set(Args &&... args) {
+    variant_.Set<T>(args...);
+    return *this;
+  }

-            template <typename T> T &Get() const { return variant_.Get<T>(); }
+  template<typename T> T &Get() const { return variant_.Get<T>(); }

-          private:
-            Variant<int, float, std::string, std::vector<int>,
-                    std::vector<float>, std::vector<std::string>, bool,
-                    std::vector<bool>, BlockDesc *, int64_t>
-                variant_;
-        };
+private:
+  Variant<int, float, std::string, std::vector<int>,
+          std::vector<float>, std::vector<std::string>, bool,
+          std::vector<bool>, BlockDesc *, int64_t>
+      variant_;
+};

-        using AttributeMap = std::unordered_map<std::string, Attribute>;
+using AttributeMap = std::unordered_map<std::string, Attribute>;

-        class AttrReader {
-          public:
-            explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
+class AttrReader {
+public:
+  explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}

-            template <typename T> inline T Get(const std::string &name) const {
-                //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
-                //          be in
-                //          AttributeMap",
-                //                         name);
-                return ((Attribute)attrs_.at(name)).Get<T>();
-            }
+  template<typename T> inline T Get(const std::string &name) const {
+    //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
+    //          be in
+    //          AttributeMap",
+    //                         name);
+    return ((Attribute) attrs_.at(name)).Get<T>();
+  }

-          private:
-            const AttributeMap &attrs_;
-        };
+private:
+  const AttributeMap &attrs_;
+};

-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/block_desc.h
+++ b/src/framework/block_desc.h
@@ -24,50 +24,50 @@ SOFTWARE.
 #include "var_desc.h"

 namespace paddle_mobile {
-    namespace framework {
+namespace framework {

-        class BlockDesc : PaddleMobileObject {
-          public:
-            BlockDesc(const proto::BlockDesc &desc);
+class BlockDesc : PaddleMobileObject {
+public:
+  BlockDesc(const proto::BlockDesc &desc);

-            const int &ID() const { return desc_.idx(); }
+  const int &ID() const { return desc_.idx(); }

-            const int &Parent() const { return desc_.parent_idx(); }
+  const int &Parent() const { return desc_.parent_idx(); }

-            bool operator==(
-                const paddle_mobile::framework::BlockDesc &in_block) const {
-                return this->ID() == in_block.ID() &&
-                       this->Parent() == in_block.Parent();
-            }
+  bool operator==(
+      const paddle_mobile::framework::BlockDesc &in_block) const {
+    return this->ID() == in_block.ID() &&
+        this->Parent() == in_block.Parent();
+  }

-            bool operator<(
-                const paddle_mobile::framework::BlockDesc &in_block) const {
-                return this->ID() < in_block.ID() &&
-                       this->Parent() < in_block.Parent();
-            }
+  bool operator<(
+      const paddle_mobile::framework::BlockDesc &in_block) const {
+    return this->ID() < in_block.ID() &&
+        this->Parent() < in_block.Parent();
+  }

-            std::vector<std::shared_ptr<VarDesc>> Vars() const;
-            std::vector<std::shared_ptr<OpDesc>> Ops() const;
+  std::vector<std::shared_ptr<VarDesc>> Vars() const;
+  std::vector<std::shared_ptr<OpDesc>> Ops() const;

-          private:
-            proto::BlockDesc desc_;
-            std::vector<std::shared_ptr<OpDesc>> ops_;
-            std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
-        };
+private:
+  proto::BlockDesc desc_;
+  std::vector<std::shared_ptr<OpDesc>> ops_;
+  std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
+};

-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile

 namespace std {

-    template <> struct hash<paddle_mobile::framework::BlockDesc> {
-        typedef paddle_mobile::framework::BlockDesc argument_type;
-        typedef std::size_t result_type;
-        result_type operator()(argument_type const &s) const noexcept {
-            result_type const h1(std::hash<int>{}(s.ID()));
-            result_type const h2(std::hash<int>{}(s.ID()));
-            return h1 ^ (h2 << 1);
-        }
-    };
+template<> struct hash<paddle_mobile::framework::BlockDesc> {
+  typedef paddle_mobile::framework::BlockDesc argument_type;
+  typedef std::size_t result_type;
+  result_type operator()(argument_type const &s) const noexcept {
+    result_type const h1(std::hash<int>{}(s.ID()));
+    result_type const h2(std::hash<int>{}(s.ID()));
+    return h1 ^ (h2 << 1);
+  }
+};

 } // namespace std
--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -19,50 +19,46 @@ limitations under the License. */
 #include <string>

 namespace paddle_mobile {
-    namespace framework {
+namespace framework {

-        enum class DataLayout {
-            kNHWC = 0,
-            kNCHW = 1,
-            kAnyLayout = 2,
-        };
+enum class DataLayout {
+  kNHWC = 0,
+  kNCHW = 1,
+  kAnyLayout = 2,
+};

-        inline DataLayout StringToDataLayout(const std::string &str) {
-            std::string s(str);
-            for (size_t i = 0; i < s.size(); ++i) {
-                s[i] = toupper(s[i]);
-            }
+inline DataLayout StringToDataLayout(const std::string &str) {
+  std::string s(str);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = toupper(s[i]);
+  }

-            if (s == "NHWC") {
-                return DataLayout::kNHWC;
-            } else if (s == "NCHW") {
-                return DataLayout::kNCHW;
-            } else if (s == "ANYLAYOUT") {
-                return DataLayout::kAnyLayout;
-            } else {
-                //    std::cout << "Unknown storage order string: %s", s;
-            }
-        }
+  if (s == "NHWC") {
+    return DataLayout::kNHWC;
+  } else if (s == "NCHW") {
+    return DataLayout::kNCHW;
+  } else if (s == "ANYLAYOUT") {
+    return DataLayout::kAnyLayout;
+  } else {
+    //    std::cout << "Unknown storage order string: %s", s;
+  }
+}

-        inline std::string DataLayoutToString(const DataLayout &data_layout) {
-            switch (data_layout) {
-            case DataLayout::kNHWC:
-                return "NHWC";
-            case DataLayout::kNCHW:
-                return "NCHW";
-            case DataLayout::kAnyLayout:
-                return "ANY_LAYOUT";
-            default:
-                break;
-                //      std::cout << "unknown DataLayou %d", data_layout;
-            }
-        }
+inline std::string DataLayoutToString(const DataLayout &data_layout) {
+  switch (data_layout) {
+  case DataLayout::kNHWC:return "NHWC";
+  case DataLayout::kNCHW:return "NCHW";
+  case DataLayout::kAnyLayout:return "ANY_LAYOUT";
+  default:break;
+    //      std::cout << "unknown DataLayou %d", data_layout;
+  }
+}

-        inline std::ostream &operator<<(std::ostream &out,
-                                        const DataLayout &l) {
-            out << DataLayoutToString(l);
-            return out;
-        }
+inline std::ostream &operator<<(std::ostream &out,
+                                const DataLayout &l) {
+  out << DataLayoutToString(l);
+  return out;
+}

-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/data_transform.cpp
+++ b/src/framework/data_transform.cpp
@@ -21,72 +21,72 @@ SOFTWARE.
 #include "data_transform.h"

 namespace paddle_mobile {
-    namespace framework {
+namespace framework {

-        static void PassTensorData(Tensor *from, Tensor *to) {
-            to->ShareDataWith(*from);
-            *from = Tensor();
-        }
+static void PassTensorData(Tensor *from, Tensor *to) {
+  to->ShareDataWith(*from);
+  *from = Tensor();
+}

-        void DataTransform(const OpKernelType &expected_kernel_type,
-                           const OpKernelType &kernel_type_for_var,
-                           const Tensor &input_tensor, Tensor *output_tensor) {
-            bool transformed = false;
-            Tensor in;
-            in.ShareDataWith(input_tensor);
-            Tensor out;
+void DataTransform(const OpKernelType &expected_kernel_type,
+                   const OpKernelType &kernel_type_for_var,
+                   const Tensor &input_tensor, Tensor *output_tensor) {
+  bool transformed = false;
+  Tensor in;
+  in.ShareDataWith(input_tensor);
+  Tensor out;

-            //  // do layout transform
-            //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-            //                          kernel_type_for_var.data_layout_)) {
-            //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-            //    &out);
-            //    transformed = true;
-            //    PassTensorData(&out, &in);
-            //  }
-            //
-            //  // do data type transform
-            //  if (expected_kernel_type.data_type_ !=
-            //  kernel_type_for_var.data_type_) {
-            //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-            //    &out);
-            //    transformed = true;
-            //    PassTensorData(&out, &in);
-            //  }
-            //
-            //  // do device transform
-            //  if (!platform::is_same_place(kernel_type_for_var.place_,
-            //                               expected_kernel_type.place_)) {
-            //    TransDataDevice(in, expected_kernel_type.place_, &out);
-            //    transformed = true;
-            //    PassTensorData(&out, &in);
-            //  }
-            //
-            //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-            //  check!");
-            // get output data
-            output_tensor->ShareDataWith(in);
-        }
+  //  // do layout transform
+  //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
+  //                          kernel_type_for_var.data_layout_)) {
+  //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
+  //    &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  // do data type transform
+  //  if (expected_kernel_type.data_type_ !=
+  //  kernel_type_for_var.data_type_) {
+  //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
+  //    &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  // do device transform
+  //  if (!platform::is_same_place(kernel_type_for_var.place_,
+  //                               expected_kernel_type.place_)) {
+  //    TransDataDevice(in, expected_kernel_type.place_, &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  PADDLE_ENFORCE(transformed, "No transform is applied, please
+  //  check!");
+  // get output data
+  output_tensor->ShareDataWith(in);
+}

-        void CopyVariableWithTensor(const Variable &in_var,
-                                    const Tensor &tensor, Variable &out_var) {
-            //  if (in_var.IsType<LoDTensor>()) {
-            //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-            //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-            //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-            //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-            //    tran_lod_tensor->ShareDataWith(tensor);
-            //  } else if (in_var.IsType<SelectedRows>()) {
-            //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-            //    auto* trans_selected_rows =
-            //    out_var.GetMutable<SelectedRows>();
-            //    trans_selected_rows->set_height(in_selected_rows.height());
-            //    trans_selected_rows->set_rows(in_selected_rows.rows());
-            //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-            //  } else {
-            //    PADDLE_THROW("unknown var type");
-            //  }
-        }
+void CopyVariableWithTensor(const Variable &in_var,
+                            const Tensor &tensor, Variable &out_var) {
+  //  if (in_var.IsType<LoDTensor>()) {
+  //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
+  //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
+  //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
+  //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
+  //    tran_lod_tensor->ShareDataWith(tensor);
+  //  } else if (in_var.IsType<SelectedRows>()) {
+  //    auto& in_selected_rows = in_var.Get<SelectedRows>();
+  //    auto* trans_selected_rows =
+  //    out_var.GetMutable<SelectedRows>();
+  //    trans_selected_rows->set_height(in_selected_rows.height());
+  //    trans_selected_rows->set_rows(in_selected_rows.rows());
+  //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
+  //  } else {
+  //    PADDLE_THROW("unknown var type");
+  //  }
+}

-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -19,58 +19,57 @@ SOFTWARE.
 #include "conv_op.h"
 #include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
-#include "framework/operator.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        int ConvOutputSize(int input_size, int filter_size, int dilation,
-                           int padding, int stride) {
-            const int dkernel = dilation * (filter_size - 1) + 1;
-            int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
-            return output_size;
-        }
+int ConvOutputSize(int input_size, int filter_size, int dilation,
+                   int padding, int stride) {
+  const int dkernel = dilation * (filter_size - 1) + 1;
+  int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
+  return output_size;
+}

-        template <typename Dtype, typename T>
-        void ConvOp<Dtype, T>::InferShape() const {
-            //  std::cout << " begin get dims: " << std::endl;
+template<typename Dtype, typename T>
+void ConvOp<Dtype, T>::InferShape() const {
+  //  std::cout << " begin get dims: " << std::endl;

-            auto in_dims = param_.Input()->dims();
+  auto in_dims = param_.Input()->dims();

-            //  std::cout << " end get in dims: " << std::endl;
+  //  std::cout << " end get in dims: " << std::endl;

-            //  std::cout << " in_dims: " << in_dims << std::endl;
+  //  std::cout << " in_dims: " << in_dims << std::endl;

-            //  std::cout << " begin get Filter " << std::endl;
+  //  std::cout << " begin get Filter " << std::endl;

-            auto filter_dims = param_.Filter()->dims();
+  auto filter_dims = param_.Filter()->dims();

-            //  std::cout << " end get Filter " << std::endl;
+  //  std::cout << " end get Filter " << std::endl;

-            //  std::cout << " begin get Attrs " << std::endl;
+  //  std::cout << " begin get Attrs " << std::endl;

-            const std::vector<int> &strides = param_.Strides();
+  const std::vector<int> &strides = param_.Strides();

-            //  std::cout << " end get Attrs " << strides[0] << std::endl;
+  //  std::cout << " end get Attrs " << strides[0] << std::endl;

-            std::vector<int> paddings = param_.Paddings();
+  std::vector<int> paddings = param_.Paddings();

-            int groups = param_.Groups();
+  int groups = param_.Groups();

-            std::vector<int> dilations = param_.Dilations();
+  std::vector<int> dilations = param_.Dilations();

-            std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-            for (size_t i = 0; i < strides.size(); ++i) {
-                output_shape.push_back(
-                    ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
-                                   dilations[i], paddings[i], strides[i]));
-            }
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(
+        ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
+                       dilations[i], paddings[i], strides[i]));
+  }

-            framework::DDim ddim = framework::make_ddim(output_shape);
-            param_.Output()->Resize(ddim);
-        }
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  param_.Output()->Resize(ddim);
+}

-        template class ConvOp<CPU, float>;
+template class ConvOp<CPU, float>;

-    } // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -22,33 +22,33 @@ SOFTWARE.
 #include "operators/kernel/conv_kernel.h"

 namespace paddle_mobile {
-    namespace operators {
-
-        using namespace framework;
-
-        template <typename DeviceType, typename T>
-        class ConvOp : public framework::OperatorWithKernel<DeviceType> {
-          public:
-            ConvOp(const std::string &type, const VariableNameMap &inputs,
-                   const VariableNameMap &outputs,
-                   const framework::AttributeMap &attrs,
-                   std::shared_ptr<framework::Scope> scope)
-                : framework::OperatorWithKernel<DeviceType>(
-                      type, inputs, outputs, attrs, scope),
-                  param_(inputs, outputs, attrs, *scope) {}
-
-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            void InferShape() const override;
-
-            void Run() const {
-                operators::ConvKernel<DeviceType, T, ConvParam> kernel;
-                kernel.Compute(param_);
-                this->ClearVariables();
-            }
-
-          private:
-            ConvParam param_;
-        };
-
-    } // operators
+namespace operators {
+
+using namespace framework;
+
+template<typename DeviceType, typename T>
+class ConvOp : public framework::OperatorWithKernel<DeviceType> {
+public:
+  ConvOp(const std::string &type, const VariableNameMap &inputs,
+         const VariableNameMap &outputs,
+         const framework::AttributeMap &attrs,
+         std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(
+      type, inputs, outputs, attrs, scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+  void Run() const {
+    operators::ConvKernel<DeviceType, T, ConvParam> kernel;
+    kernel.Compute(param_);
+    this->ClearVariables();
+  }
+
+private:
+  ConvParam param_;
+};
+
+} // operators
 } // paddle_mobile
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -19,13 +19,13 @@ SOFTWARE.
 #include "elementwise_add_op.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        template <typename Dtype, typename T>
-        void ElementwiseAddOp<Dtype, T>::InferShape() const {
-            auto x_dim = param_.InputX()->dims();
-            param_.Out()->Resize(x_dim);
-        }
-        template class ElementwiseAddOp<CPU, float>;
-    }
+template<typename Dtype, typename T>
+void ElementwiseAddOp<Dtype, T>::InferShape() const {
+  auto x_dim = param_.InputX()->dims();
+  param_.Out()->Resize(x_dim);
+}
+template class ElementwiseAddOp<CPU, float>;
+}
 }
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -21,35 +21,35 @@ SOFTWARE.
 #include "op_param.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        using namespace framework;
+using namespace framework;

-        template <typename DeviceType, typename T>
-        class ElementwiseAddOp
-            : public framework::OperatorWithKernel<DeviceType> {
-          public:
-            ElementwiseAddOp(const std::string &type,
-                             const VariableNameMap &inputs,
-                             const VariableNameMap &outputs,
-                             const framework::AttributeMap attrs,
-                             std::shared_ptr<framework::Scope> scope)
-                : framework::OperatorWithKernel<DeviceType>(
-                      type, inputs, outputs, attrs, scope),
-                  param_(inputs, outputs, attrs, *scope) {}
+template<typename DeviceType, typename T>
+class ElementwiseAddOp
+    : public framework::OperatorWithKernel<DeviceType> {
+public:
+  ElementwiseAddOp(const std::string &type,
+                   const VariableNameMap &inputs,
+                   const VariableNameMap &outputs,
+                   const framework::AttributeMap attrs,
+                   std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(
+      type, inputs, outputs, attrs, scope),
+        param_(inputs, outputs, attrs, *scope) {}

-            void Run() const {
-                operators::ElementwiseAddKernel<DeviceType, T,
-                                                ElementwiseAddParam>
-                    kernel;
-                kernel.Compute(param_);
-            }
+  void Run() const {
+    operators::ElementwiseAddKernel<DeviceType, T,
+                                    ElementwiseAddParam>
+        kernel;
+    kernel.Compute(param_);
+  }

-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            void InferShape() const override;
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;

-          protected:
-            ElementwiseAddParam param_;
-        };
-    }
+protected:
+  ElementwiseAddParam param_;
+};
+}
 }
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -19,146 +19,146 @@ SOFTWARE.
 #include "operators/kernel/conv_kernel.h"

 namespace paddle_mobile {
-    namespace operators {
-
-        bool IsExpand(const std::vector<int64_t> &filter_dim,
-                      const std::vector<int> &strides,
-                      const std::vector<int> &paddings,
-                      const std::vector<int> &dilations) {
-            bool filter_1 = true, strides_1 = true, padding_0 = true,
-                 dilation_1 = true;
-            for (size_t j = 0; j < strides.size(); ++j) {
-                filter_1 =
-                    filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
-                strides_1 = strides_1 && (strides[j] == 1);
-                padding_0 = padding_0 && (paddings[j] == 0);
-                dilation_1 = dilation_1 && (dilations[j] == 1);
-            }
-            return !(filter_1 && strides_1 && padding_0 && dilation_1);
-        }
-
-        template <>
-        void ConvKernel<CPU, float, ConvParam>::Compute(
-            const ConvParam &param) const {
-            LOG(kLOG_DEBUG) << param;
-
-            const Tensor *input = param.Input();
-
-            // The filter will be reshaped in the calculations,
-            // so here use an assignment operation,
-            // that avoids modifying the variable in the Scope.
-            Tensor filter = *param.Filter();
-
-            Tensor *output = param.Output();
-            //            output->mutable_data<T>(context.GetPlace());
-
-            int groups = param.Groups();
-            std::vector<int> strides = param.Strides();
-            std::vector<int> paddings = param.Paddings();
-            std::vector<int> dilations = param.Dilations();
-
-            DLOG << " compute end get Attrs " << strides[0];
-
-            const int batch_size = static_cast<int>(input->dims()[0]);
-
-            // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
-            // k_w}
-            std::vector<int64_t> filter_shape_vec(
-                framework::vectorize(filter.dims()));
-            // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
-            // o_w}
-            std::vector<int64_t> output_shape_vec(
-                framework::vectorize(output->dims()));
-
-            // use col_shape in the im2col calculation
-            // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
-            // k_w, o_d,
-            // o_h, o_w}
-            size_t data_dim = filter_shape_vec.size() - 2;
-            std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-            col_shape_vec[0] = input->dims()[1] / groups;
-            for (size_t j = 0; j < data_dim; ++j) {
-                col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-                col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-            }
-            framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-
-            // use col_matrix_shape in the gemm calculation
-            // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
-            // o_d *
-            // o_h * o_w)
-            framework::DDim col_matrix_shape =
-                framework::flatten_to_2d(col_shape, data_dim + 1);
-
-            bool is_expand =
-                IsExpand(filter_shape_vec, strides, paddings, dilations);
-            Tensor col;
-            // col_matrix shares the same piece of data with col,
-            // but will be reshaped into a two-dimensional matrix shape
-            // to call the matrix multiplication interface.
-            Tensor col_matrix;
-            if (is_expand) {
-                col.mutable_data<float>(col_shape);
-                col_matrix.ShareDataWith(col);
-                col_matrix.Resize(col_matrix_shape);
-            }
-
-            framework::DDim input_shape = framework::slice_ddim(
-                input->dims(), 1, static_cast<int>(input->dims().size()));
-
-            framework::DDim filter_matrix_shape = {
-                filter.dims()[0], filter.numel() / filter.dims()[0]};
-            filter.Resize(filter_matrix_shape);
-
-            framework::DDim output_matrix_shape = {
-                output->dims()[1],
-                output->numel() / (output->dims()[0] * output->dims()[1])};
-
-            // convolution operator: im2col(or vol2col) + gemm
-            int in_step = static_cast<int>(input->dims()[1]) / groups;
-            int out_step = static_cast<int>(output->dims()[1]) / groups;
-
-            math::Vol2ColFunctor<CPU, float> vol2col;
-            math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-
-            //            auto& dev_ctx = context.template
-            //            device_context<DeviceContext>();
-            for (int i = 0; i < batch_size; i++) {
-                Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-                Tensor out_batch =
-                    output->Slice(i, i + 1).Resize(output_matrix_shape);
-
-                for (int g = 0; g < groups; g++) {
-                    Tensor in_slice =
-                        in_batch.Slice(g * in_step, (g + 1) * in_step);
-
-                    if (!is_expand) {
-                        col.ShareDataWith(in_slice);
-                        col_matrix.ShareDataWith(col);
-                        col_matrix.Resize(col_matrix_shape);
-                    } else if (data_dim == 2U) {
-                        // im2col
-                        im2col(in_slice, dilations, strides,
-                               std::vector<int>{paddings[0], paddings[1],
-                                                paddings[0], paddings[1]},
-                               &col);
-                    } else if (data_dim == 3U) {
-                        // vol2col
-                        vol2col(in_slice, dilations, strides, paddings, &col);
-                    }
-
-                    // gemm
-                    Tensor out_slice =
-                        out_batch.Slice(g * out_step, (g + 1) * out_step);
-                    Tensor filter_slice =
-                        filter.Slice(g * out_step, (g + 1) * out_step);
-                    math::matmul<float>(filter_slice, false, col_matrix, false,
-                                        float(1.0), &out_slice, float(0.0));
-                }
-            }
-        }
-
-        template class ConvKernel<CPU, float, ConvParam>;
-
-    } // namespace operators
+namespace operators {
+
+bool IsExpand(const std::vector<int64_t> &filter_dim,
+              const std::vector<int> &strides,
+              const std::vector<int> &paddings,
+              const std::vector<int> &dilations) {
+  bool filter_1 = true, strides_1 = true, padding_0 = true,
+      dilation_1 = true;
+  for (size_t j = 0; j < strides.size(); ++j) {
+    filter_1 =
+        filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
+    strides_1 = strides_1 && (strides[j] == 1);
+    padding_0 = padding_0 && (paddings[j] == 0);
+    dilation_1 = dilation_1 && (dilations[j] == 1);
+  }
+  return !(filter_1 && strides_1 && padding_0 && dilation_1);
+}
+
+template<>
+void ConvKernel<CPU, float, ConvParam>::Compute(
+    const ConvParam &param) const {
+  LOG(kLOG_DEBUG) << param;
+
+  const Tensor *input = param.Input();
+
+  // The filter will be reshaped in the calculations,
+  // so here use an assignment operation,
+  // that avoids modifying the variable in the Scope.
+  Tensor filter = *param.Filter();
+
+  Tensor *output = param.Output();
+  //            output->mutable_data<T>(context.GetPlace());
+
+  int groups = param.Groups();
+  std::vector<int> strides = param.Strides();
+  std::vector<int> paddings = param.Paddings();
+  std::vector<int> dilations = param.Dilations();
+
+  DLOG << " compute end get Attrs " << strides[0];
+
+  const int batch_size = static_cast<int>(input->dims()[0]);
+
+  // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
+  // k_w}
+  std::vector<int64_t> filter_shape_vec(
+      framework::vectorize(filter.dims()));
+  // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
+  // o_w}
+  std::vector<int64_t> output_shape_vec(
+      framework::vectorize(output->dims()));
+
+  // use col_shape in the im2col calculation
+  // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
+  // k_w, o_d,
+  // o_h, o_w}
+  size_t data_dim = filter_shape_vec.size() - 2;
+  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+  col_shape_vec[0] = input->dims()[1] / groups;
+  for (size_t j = 0; j < data_dim; ++j) {
+    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+  }
+  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+
+  // use col_matrix_shape in the gemm calculation
+  // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
+  // o_d *
+  // o_h * o_w)
+  framework::DDim col_matrix_shape =
+      framework::flatten_to_2d(col_shape, data_dim + 1);
+
+  bool is_expand =
+      IsExpand(filter_shape_vec, strides, paddings, dilations);
+  Tensor col;
+  // col_matrix shares the same piece of data with col,
+  // but will be reshaped into a two-dimensional matrix shape
+  // to call the matrix multiplication interface.
+  Tensor col_matrix;
+  if (is_expand) {
+    col.mutable_data<float>(col_shape);
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+  }
+
+  framework::DDim input_shape = framework::slice_ddim(
+      input->dims(), 1, static_cast<int>(input->dims().size()));
+
+  framework::DDim filter_matrix_shape = {
+      filter.dims()[0], filter.numel() / filter.dims()[0]};
+  filter.Resize(filter_matrix_shape);
+
+  framework::DDim output_matrix_shape = {
+      output->dims()[1],
+      output->numel() / (output->dims()[0] * output->dims()[1])};
+
+  // convolution operator: im2col(or vol2col) + gemm
+  int in_step = static_cast<int>(input->dims()[1]) / groups;
+  int out_step = static_cast<int>(output->dims()[1]) / groups;
+
+  math::Vol2ColFunctor<CPU, float> vol2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+
+  //            auto& dev_ctx = context.template
+  //            device_context<DeviceContext>();
+  for (int i = 0; i < batch_size; i++) {
+    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+    Tensor out_batch =
+        output->Slice(i, i + 1).Resize(output_matrix_shape);
+
+    for (int g = 0; g < groups; g++) {
+      Tensor in_slice =
+          in_batch.Slice(g * in_step, (g + 1) * in_step);
+
+      if (!is_expand) {
+        col.ShareDataWith(in_slice);
+        col_matrix.ShareDataWith(col);
+        col_matrix.Resize(col_matrix_shape);
+      } else if (data_dim == 2U) {
+        // im2col
+        im2col(in_slice, dilations, strides,
+               std::vector<int>{paddings[0], paddings[1],
+                                paddings[0], paddings[1]},
+               &col);
+      } else if (data_dim == 3U) {
+        // vol2col
+        vol2col(in_slice, dilations, strides, paddings, &col);
+      }
+
+      // gemm
+      Tensor out_slice =
+          out_batch.Slice(g * out_step, (g + 1) * out_step);
+      Tensor filter_slice =
+          filter.Slice(g * out_step, (g + 1) * out_step);
+      math::matmul<float>(filter_slice, false, col_matrix, false,
+                          float(1.0), &out_slice, float(0.0));
+    }
+  }
+}
+
+template class ConvKernel<CPU, float, ConvParam>;
+
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -17,25 +17,25 @@ limitations under the License. */
 #include "operators/kernel/elementwise_add_kernel.h"

 namespace paddle_mobile {
-    namespace operators {
-
-        template <typename T> struct AddFunctor {
-            inline T operator()(T a, T b) const { return a + b; }
-        };
-
-        template <>
-        void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
-            const ElementwiseAddParam &param) const {
-            const Tensor *input_x = param.InputX();
-            const Tensor *input_y = param.InputY();
-            Tensor *Out = param.Out();
-            Out->mutable_data<float>();
-            const int axis = param.Axis();
-            ElementwiseComputeEx<AddFunctor<float>, float>(
-                input_x, input_y, axis, AddFunctor<float>(), Out);
-        }
-
-        template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
-
-    } // namespace operators
+namespace operators {
+
+template<typename T> struct AddFunctor {
+  inline T operator()(T a, T b) const { return a + b; }
+};
+
+template<>
+void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
+    const ElementwiseAddParam &param) const {
+  const Tensor *input_x = param.InputX();
+  const Tensor *input_y = param.InputY();
+  Tensor *Out = param.Out();
+  Out->mutable_data<float>();
+  const int axis = param.Axis();
+  ElementwiseComputeEx<AddFunctor<float>, float>(
+      input_x, input_y, axis, AddFunctor<float>(), Out);
+}
+
+template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
+
+} // namespace operators
 } // namespace paddle
--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -21,36 +21,36 @@ SOFTWARE.
 #include "operators/kernel/mul_kernel.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        template <>
-        void
-        MulKernel<CPU, float, MulParam>::Compute(const MulParam &param) const {
-            const Tensor *input_x = param.InputX();
-            const Tensor *input_y = param.InputY();
-            Tensor *out = param.Out();
-            out->mutable_data<float>();
-            const Tensor x_matrix =
-                input_x->dims().size() > 2
-                    ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
-                    : *input_x;
-            const Tensor y_matrix =
-                input_y->dims().size() > 2
-                    ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
-                    : *input_y;
-            auto out_dim = out->dims();
-            if (out_dim.size() != 2) {
-                out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
-            }
-            math::matmul<float>(x_matrix, false, y_matrix, false,
-                                static_cast<float>(1), out,
-                                static_cast<float>(0));
-            if (out_dim.size() != 2) {
-                out->Resize(out_dim);
-            }
-        }
+template<>
+void
+MulKernel<CPU, float, MulParam>::Compute(const MulParam &param) const {
+  const Tensor *input_x = param.InputX();
+  const Tensor *input_y = param.InputY();
+  Tensor *out = param.Out();
+  out->mutable_data<float>();
+  const Tensor x_matrix =
+      input_x->dims().size() > 2
+      ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
+      : *input_x;
+  const Tensor y_matrix =
+      input_y->dims().size() > 2
+      ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
+      : *input_y;
+  auto out_dim = out->dims();
+  if (out_dim.size() != 2) {
+    out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
+  }
+  math::matmul<float>(x_matrix, false, y_matrix, false,
+                      static_cast<float>(1), out,
+                      static_cast<float>(0));
+  if (out_dim.size() != 2) {
+    out->Resize(out_dim);
+  }
+}

-        template class MulKernel<CPU, float, MulParam>;
+template class MulKernel<CPU, float, MulParam>;

-    } // namespace operators
+} // namespace operators
 } // namespace paddle
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -25,15 +25,15 @@ SOFTWARE.
 #pragma once;

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        using namespace framework;
+using namespace framework;

-        template <typename DeviceType, typename T, typename P>
-        class ConvKernel
-            : public framework::OpKernelBase<DeviceType, ConvParam> {
-          public:
-            void Compute(const ConvParam &param) const;
-        };
-    }
+template<typename DeviceType, typename T, typename P>
+class ConvKernel
+    : public framework::OpKernelBase<DeviceType, ConvParam> {
+public:
+  void Compute(const ConvParam &param) const;
+};
+}
 }
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -22,15 +22,15 @@ SOFTWARE.
 #include "operators/op_param.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        using namespace framework;
+using namespace framework;

-        template <typename DeviceType, typename T, typename P>
-        class ElementwiseAddKernel
-            : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
-          public:
-            void Compute(const ElementwiseAddParam &param) const;
-        };
-    }
+template<typename DeviceType, typename T, typename P>
+class ElementwiseAddKernel
+    : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
+public:
+  void Compute(const ElementwiseAddParam &param) const;
+};
+}
 }
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -16,15 +16,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/

-#include "operators/kernel/conv_kernel.h"
-
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        // template<>
-        // void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
-        // {}
-        //
-        // template class ConvKernel<FPGA, float>;
-    }
+// template<>
+// void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
+// {}
+//
+// template class ConvKernel<FPGA, float>;
+}
 }
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -22,14 +22,14 @@ SOFTWARE.
 #pragma once;

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        using namespace framework;
+using namespace framework;

-        template <typename DeviceType, typename T, typename P>
-        class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
-          public:
-            void Compute(const MulParam &param) const;
-        };
-    }
+template<typename DeviceType, typename T, typename P>
+class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
+public:
+  void Compute(const MulParam &param) const;
+};
+}
 }
--- a/src/operators/math/elementwise_op_function.h
+++ b/src/operators/math/elementwise_op_function.h
@@ -18,194 +18,194 @@ limitations under the License. */
 #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)

 namespace paddle_mobile {
-    namespace operators {
-
-        /*
-         * Out = X ⊙ Y
-         * If Y's shape does not match X' shape, they will be reshaped.
-         * For example:
-         * 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
-         *    pre=2, n=3*4, post=5
-         *    x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
-         * 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
-         *    pre=2*3, n=4*5, post=1
-         *    x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
-         */
-        inline void get_mid_dims(const framework::DDim &x_dims,
-                                 const framework::DDim &y_dims, const int axis,
-                                 int *pre, int *n, int *post) {
-            *pre = 1;
-            *n = 1;
-            *post = 1;
-            // compute pre
-            for (int i = 0; i < axis; ++i) {
-                (*pre) *= x_dims[i];
-            }
-
-            for (int i = 0; i < y_dims.size(); ++i) {
-                assert(x_dims[i + axis] == y_dims[i]);
-                /// "Broadcast dimension mismatch.");
-                (*n) *= y_dims[i];
-            }
-
-            for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
-                (*post) *= x_dims[i];
-            }
-        }
-
-        /// remove dims tail 1. (4,20,1,1) -> (4,20)
-        inline void trim_trailing_singular_dims(framework::DDim *dims) {
-            // Remove trailing dimensions of size 1 for y
-            auto actual_dims_size = dims->size();
-            for (; actual_dims_size != 0; --actual_dims_size) {
-                if ((*dims)[actual_dims_size - 1] != 1)
-                    break;
-            }
-            if (actual_dims_size != dims->size()) {
-                auto actual_dims = framework::vectorize(*dims);
-                actual_dims.resize(actual_dims_size);
-                *dims = framework::make_ddim(actual_dims);
-            }
-        }
-
-        template <typename T> class RowwiseTransformIterator {
-          public:
-            RowwiseTransformIterator(const T *ptr, int n)
-                : ptr_(ptr), i_(0), n_(n) {}
-
-            RowwiseTransformIterator<T> &operator++() {
-                ++i_;
-                if (UNLIKELY(i_ == n_)) {
-                    i_ = 0;
-                }
-                return *this;
-            }
-
-            bool operator==(const RowwiseTransformIterator<T> &rhs) const {
-                return (ptr_ + i_) == &(*rhs);
-            }
-
-            bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
-                return (ptr_ + i_) != &(*rhs);
-            }
-
-            const T &operator*() { return ptr_[i_]; }
-
-          private:
-            const T *ptr_;
-            int i_;
-            int64_t n_;
-        };
-
-        /// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
-        /// dimension
-        /// in (4,20,2) is 2 ,
-        /// (20,1) move 1 stride , to fill(add) 2 element with the same number.
-        template <typename T> class MidWiseTransformIterator {
-          public:
-            MidWiseTransformIterator(const T *ptr, int n, int post)
-                : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
-
-            MidWiseTransformIterator<T> &operator++() {
-                ++j_;
-                if (UNLIKELY(j_ == post_)) {
-                    ++i_;
-                    j_ = 0;
-                    if (UNLIKELY(i_ == n_)) {
-                        i_ = 0;
-                    }
-                }
-                return *this;
-            }
-
-            bool operator==(const MidWiseTransformIterator<T> &rhs) const {
-                return (ptr_ + i_) == &(*rhs);
-            }
-
-            bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
-                return (ptr_ + i_) != &(*rhs);
-            }
-
-            const T &operator*() { return ptr_[i_]; }
-
-          private:
-            const T *ptr_;
-            int64_t i_;
-            int64_t j_;
-            int64_t n_;
-            int64_t post_;
-        };
-
-        template <typename Functor, typename T, typename OutType = T>
-        class TransformFunctor {
-          public:
-            TransformFunctor(const framework::Tensor *x,
-                             const framework::Tensor *y, framework::Tensor *z,
-                             Functor func)
-                : x_(x->data<T>()), y_(y->data<T>()),
-                  z_(z->mutable_data<OutType>()), nx_(x->numel()), func_(func) {
-            }
-
-            inline void Run() const {
-                math::Transform trans;
-                // 同时执行func(x_, y_)传入z_。
-                trans(x_, x_ + nx_, y_, z_, func_);
-            }
-
-            inline void RunRowWise(int n, int pre) const {
-                math::Transform trans;
-                trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_,
-                      func_);
-            }
-
-            inline void RunMidWise(int n, int pre, int post) const {
-                math::Transform trans;
-                trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post),
-                      z_, func_);
-            }
-
-          private:
-            const T *x_;
-            const T *y_;
-            OutType *z_;
-            int64_t nx_;
-            Functor func_;
-        };
-
-        template <typename Functor, typename T, typename OutType = T>
-        void ElementwiseComputeEx(const framework::Tensor *x,
-                                  const framework::Tensor *y, int axis,
-                                  Functor func, framework::Tensor *z) {
-            TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
-
-            auto x_dims = x->dims();
-            auto y_dims = y->dims();
-            // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-            //                  "Rank of first input must >= rank of second
-            //                  input.");
-
-            if (x_dims == y_dims) {
-                functor.Run();
-                return;
-            }
-
-            /// axis = -1 represent the last dimension.
-            axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-            // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-            //               "Axis should be in range [0, x_dims)");
-            trim_trailing_singular_dims(&y_dims);
-            axis = (y_dims.size() == 0) ? x_dims.size() : axis;
-
-            int pre, n, post;
-            get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
-            if (post == 1) {
-                functor.RunRowWise(n, pre);
-                return;
-            } else {
-                functor.RunMidWise(n, pre, post);
-                return;
-            }
-        }
-
-    } // namespace operators
+namespace operators {
+
+/*
+ * Out = X ⊙ Y
+ * If Y's shape does not match X' shape, they will be reshaped.
+ * For example:
+ * 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+ *    pre=2, n=3*4, post=5
+ *    x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
+ * 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
+ *    pre=2*3, n=4*5, post=1
+ *    x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
+ */
+inline void get_mid_dims(const framework::DDim &x_dims,
+                         const framework::DDim &y_dims, const int axis,
+                         int *pre, int *n, int *post) {
+  *pre = 1;
+  *n = 1;
+  *post = 1;
+  // compute pre
+  for (int i = 0; i < axis; ++i) {
+    (*pre) *= x_dims[i];
+  }
+
+  for (int i = 0; i < y_dims.size(); ++i) {
+    assert(x_dims[i + axis] == y_dims[i]);
+    /// "Broadcast dimension mismatch.");
+    (*n) *= y_dims[i];
+  }
+
+  for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
+    (*post) *= x_dims[i];
+  }
+}
+
+/// remove dims tail 1. (4,20,1,1) -> (4,20)
+inline void trim_trailing_singular_dims(framework::DDim *dims) {
+  // Remove trailing dimensions of size 1 for y
+  auto actual_dims_size = dims->size();
+  for (; actual_dims_size != 0; --actual_dims_size) {
+    if ((*dims)[actual_dims_size - 1] != 1)
+      break;
+  }
+  if (actual_dims_size != dims->size()) {
+    auto actual_dims = framework::vectorize(*dims);
+    actual_dims.resize(actual_dims_size);
+    *dims = framework::make_ddim(actual_dims);
+  }
+}
+
+template<typename T> class RowwiseTransformIterator {
+public:
+  RowwiseTransformIterator(const T *ptr, int n)
+      : ptr_(ptr), i_(0), n_(n) {}
+
+  RowwiseTransformIterator<T> &operator++() {
+    ++i_;
+    if (UNLIKELY(i_ == n_)) {
+      i_ = 0;
+    }
+    return *this;
+  }
+
+  bool operator==(const RowwiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) == &(*rhs);
+  }
+
+  bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) != &(*rhs);
+  }
+
+  const T &operator*() { return ptr_[i_]; }
+
+private:
+  const T *ptr_;
+  int i_;
+  int64_t n_;
+};
+
+/// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
+/// dimension
+/// in (4,20,2) is 2 ,
+/// (20,1) move 1 stride , to fill(add) 2 element with the same number.
+template<typename T> class MidWiseTransformIterator {
+public:
+  MidWiseTransformIterator(const T *ptr, int n, int post)
+      : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
+
+  MidWiseTransformIterator<T> &operator++() {
+    ++j_;
+    if (UNLIKELY(j_ == post_)) {
+      ++i_;
+      j_ = 0;
+      if (UNLIKELY(i_ == n_)) {
+        i_ = 0;
+      }
+    }
+    return *this;
+  }
+
+  bool operator==(const MidWiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) == &(*rhs);
+  }
+
+  bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) != &(*rhs);
+  }
+
+  const T &operator*() { return ptr_[i_]; }
+
+private:
+  const T *ptr_;
+  int64_t i_;
+  int64_t j_;
+  int64_t n_;
+  int64_t post_;
+};
+
+template<typename Functor, typename T, typename OutType = T>
+class TransformFunctor {
+public:
+  TransformFunctor(const framework::Tensor *x,
+                   const framework::Tensor *y, framework::Tensor *z,
+                   Functor func)
+      : x_(x->data<T>()), y_(y->data<T>()),
+        z_(z->mutable_data<OutType>()), nx_(x->numel()), func_(func) {
+  }
+
+  inline void Run() const {
+    math::Transform trans;
+    // 同时执行func(x_, y_)传入z_。
+    trans(x_, x_ + nx_, y_, z_, func_);
+  }
+
+  inline void RunRowWise(int n, int pre) const {
+    math::Transform trans;
+    trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_,
+          func_);
+  }
+
+  inline void RunMidWise(int n, int pre, int post) const {
+    math::Transform trans;
+    trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post),
+          z_, func_);
+  }
+
+private:
+  const T *x_;
+  const T *y_;
+  OutType *z_;
+  int64_t nx_;
+  Functor func_;
+};
+
+template<typename Functor, typename T, typename OutType = T>
+void ElementwiseComputeEx(const framework::Tensor *x,
+                          const framework::Tensor *y, int axis,
+                          Functor func, framework::Tensor *z) {
+  TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
+
+  auto x_dims = x->dims();
+  auto y_dims = y->dims();
+  // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
+  //                  "Rank of first input must >= rank of second
+  //                  input.");
+
+  if (x_dims == y_dims) {
+    functor.Run();
+    return;
+  }
+
+  /// axis = -1 represent the last dimension.
+  axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
+  // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
+  //               "Axis should be in range [0, x_dims)");
+  trim_trailing_singular_dims(&y_dims);
+  axis = (y_dims.size() == 0) ? x_dims.size() : axis;
+
+  int pre, n, post;
+  get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
+  if (post == 1) {
+    functor.RunRowWise(n, pre);
+    return;
+  } else {
+    functor.RunMidWise(n, pre, post);
+    return;
+  }
+}
+
+} // namespace operators
 } // namespace paddle
--- a/src/operators/math/im2col.cc
+++ b/src/operators/math/im2col.cc
--- a/src/operators/math/im2col.h
+++ b/src/operators/math/im2col.h
@@ -17,96 +17,96 @@ limitations under the License. */
 #include "framework/tensor.h"

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
+namespace operators {
+namespace math {

-            /* The storage format of the coldata in the Im2ColFunctor and
-             * Col2ImFunctor. */
-            enum class ColFormat { kCFO = 0, kOCF = 1 };
+/* The storage format of the coldata in the Im2ColFunctor and
+ * Col2ImFunctor. */
+enum class ColFormat { kCFO = 0, kOCF = 1 };

-            /*
-             * \brief Converts the image data of three dimensions(CHW) into a
-             * colData of
-             *        five dimensions in the Im2ColFunctor calculation,
-             *        And in the Col2ImFunctor calculation, it is reversed.
-             *
-             * \param imData   Image data.
-             * \param imShape  The shape of imData,
-             *                 [input_channels, input_height, input_width].
-             * \param colData  Column data.
-             * \param colShape The shape of colData.
-             *
-             * \param dilations    dilation data.
-             * \param 2-dimension  [dilation_height, dilation_width].
-             *
-             * \param strides      stride data.
-             * \param 2-dimension  [stride_height, stride_width].
-             *
-             * \param paddings     padding data.
-             * \param 4-dimension  [up_pad, left_pad, down_pad, right_pad].
-             *
-             * If the template argument Format is kCFO, the shape of colData is:
-             * [input_channels, filter_height, filter_width, output_height,
-             * output_width]
-             * So, it is easy to reshape into a convolution matrix for
-             * convolution
-             * calculation based on matrix multiplication.
-             * The shape of convolution matrix is [height, width], where the
-             * height is equal
-             * input_channels * filter_height * filter_width, and the width is
-             * equal
-             * output_height * output_width.
-             *
-             * Reshape:
-             *     shape of colData           shape of convolution matrix
-             *     [input_channels,
-             *      filter_height,
-             *      filter_width,      ======>      [height, width]
-             *      output_height,
-             *      output_width]
-             *
-             * If the template argument Format is kOCF, the shape of colData is:
-             * [output_height, output_width, input_channels, filter_height,
-             * filter_width]
-             * So, it is easy to reshape into a sequence matrix for rnn
-             * calculation.
-             * The shape of sequence matrix is [seq_length, step_size], where
-             * the seq_length
-             * is equal output_height * output_width, and the step_size is equal
-             * input_channels * filter_height * filter_width.
-             *
-             * Reshape:
-             *     shape of colData             shape of sequence matrix
-             *     [output_height,
-             *      output_width,
-             *      input_channels,    ======>    [seqLength, stepSize]
-             *      filter_height,
-             *      filter_width]
-             *
-             * \note The caller needs to ensure that imShape.inputChannels is
-             * equal to
-             *       colShape.inputChannels.
-             */
-            template <ColFormat Format, typename DeviceType, typename T>
-            class Im2ColFunctor {
-              public:
-                void operator()(const framework::Tensor &im,
-                                const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
-                                const std::vector<int> &padding,
-                                framework::Tensor *col);
-            };
+/*
+ * \brief Converts the image data of three dimensions(CHW) into a
+ * colData of
+ *        five dimensions in the Im2ColFunctor calculation,
+ *        And in the Col2ImFunctor calculation, it is reversed.
+ *
+ * \param imData   Image data.
+ * \param imShape  The shape of imData,
+ *                 [input_channels, input_height, input_width].
+ * \param colData  Column data.
+ * \param colShape The shape of colData.
+ *
+ * \param dilations    dilation data.
+ * \param 2-dimension  [dilation_height, dilation_width].
+ *
+ * \param strides      stride data.
+ * \param 2-dimension  [stride_height, stride_width].
+ *
+ * \param paddings     padding data.
+ * \param 4-dimension  [up_pad, left_pad, down_pad, right_pad].
+ *
+ * If the template argument Format is kCFO, the shape of colData is:
+ * [input_channels, filter_height, filter_width, output_height,
+ * output_width]
+ * So, it is easy to reshape into a convolution matrix for
+ * convolution
+ * calculation based on matrix multiplication.
+ * The shape of convolution matrix is [height, width], where the
+ * height is equal
+ * input_channels * filter_height * filter_width, and the width is
+ * equal
+ * output_height * output_width.
+ *
+ * Reshape:
+ *     shape of colData           shape of convolution matrix
+ *     [input_channels,
+ *      filter_height,
+ *      filter_width,      ======>      [height, width]
+ *      output_height,
+ *      output_width]
+ *
+ * If the template argument Format is kOCF, the shape of colData is:
+ * [output_height, output_width, input_channels, filter_height,
+ * filter_width]
+ * So, it is easy to reshape into a sequence matrix for rnn
+ * calculation.
+ * The shape of sequence matrix is [seq_length, step_size], where
+ * the seq_length
+ * is equal output_height * output_width, and the step_size is equal
+ * input_channels * filter_height * filter_width.
+ *
+ * Reshape:
+ *     shape of colData             shape of sequence matrix
+ *     [output_height,
+ *      output_width,
+ *      input_channels,    ======>    [seqLength, stepSize]
+ *      filter_height,
+ *      filter_width]
+ *
+ * \note The caller needs to ensure that imShape.inputChannels is
+ * equal to
+ *       colShape.inputChannels.
+ */
+template<ColFormat Format, typename DeviceType, typename T>
+class Im2ColFunctor {
+public:
+  void operator()(const framework::Tensor &im,
+                  const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding,
+                  framework::Tensor *col);
+};

-            template <ColFormat Format, typename DeviceType, typename T>
-            class Col2ImFunctor {
-              public:
-                void operator()(const framework::Tensor &col,
-                                const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
-                                const std::vector<int> &padding,
-                                framework::Tensor *im);
-            };
+template<ColFormat Format, typename DeviceType, typename T>
+class Col2ImFunctor {
+public:
+  void operator()(const framework::Tensor &col,
+                  const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding,
+                  framework::Tensor *im);
+};

-        } // namespace math
-    }     // namespace operators
+} // namespace math
+}     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/math_function.cc
+++ b/src/operators/math/math_function.cc
@@ -15,125 +15,125 @@ limitations under the License. */
 #include "math_function.h"

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
-
-            template <>
-            void gemm<float>(const CBLAS_TRANSPOSE transA,
-                             const CBLAS_TRANSPOSE transB, const int M,
-                             const int N, const int K, const float alpha,
-                             const float *A, const float *B, const float beta,
-                             float *C) {
-                int lda = (transA == CblasNoTrans) ? K : M;
-                int ldb = (transB == CblasNoTrans) ? N : K;
-                int ldc = N;
-                cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
-                            lda, B, ldb, beta, C, ldc);
-            }
-
-            template <>
-            void gemm<double>(const CBLAS_TRANSPOSE transA,
-                              const CBLAS_TRANSPOSE transB, const int M,
-                              const int N, const int K, const double alpha,
-                              const double *A, const double *B,
-                              const double beta, double *C) {
-                int lda = (transA == CblasNoTrans) ? K : M;
-                int ldb = (transB == CblasNoTrans) ? N : K;
-                int ldc = N;
-                cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
-                            lda, B, ldb, beta, C, ldc);
-            }
-
-            template <>
-            void gemm<float>(const bool transA, const bool transB, const int M,
-                             const int N, const int K, const float alpha,
-                             const float *A, const int lda, const float *B,
-                             const int ldb, const float beta, float *C,
-                             const int ldc) {
-                cblas_sgemm(CblasRowMajor,
-                            transA == false ? CblasNoTrans : CblasTrans,
-                            transB == false ? CblasNoTrans : CblasTrans, M, N,
-                            K, alpha, A, lda, B, ldb, beta, C, ldc);
-            }
-
-            template <>
-            void gemm<double>(const bool transA, const bool transB, const int M,
-                              const int N, const int K, const double alpha,
-                              const double *A, const int lda, const double *B,
-                              const int ldb, const double beta, double *C,
-                              const int ldc) {
-                cblas_dgemm(CblasRowMajor,
-                            transA == false ? CblasNoTrans : CblasTrans,
-                            transB == false ? CblasNoTrans : CblasTrans, M, N,
-                            K, alpha, A, lda, B, ldb, beta, C, ldc);
-            }
-
-            template <>
-            void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
-                               const framework::Tensor &matrix_b, bool trans_b,
-                               float alpha, framework::Tensor *matrix_out,
-                               float beta) {
-                auto dim_a = matrix_a.dims();
-                auto dim_b = matrix_b.dims();
-                auto dim_out = matrix_out->dims();
-                //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-                //  dim_out.size() ==
-                //  2,
-                //                 "The input and output of matmul be matrix");
-                //
-                //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-                //                     platform::is_cpu_place(matrix_b.place())
-                //                     &&
-                //                     platform::is_cpu_place(matrix_out->place()),
-                //                 "Matrix must all be in CPUPlace");
-
-                int M = dim_out[0];
-                int N = dim_out[1];
-                int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-
-                CBLAS_TRANSPOSE transA =
-                    (trans_a == false) ? CblasNoTrans : CblasTrans;
-                CBLAS_TRANSPOSE transB =
-                    (trans_b == false) ? CblasNoTrans : CblasTrans;
-
-                gemm<float>(transA, transB, M, N, K, alpha,
-                            matrix_a.data<float>(), matrix_b.data<float>(),
-                            beta, matrix_out->data<float>());
-            }
-
-            template <>
-            void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
-                                const framework::Tensor &matrix_b, bool trans_b,
-                                double alpha, framework::Tensor *matrix_out,
-                                double beta) {
-                auto dim_a = matrix_a.dims();
-                auto dim_b = matrix_b.dims();
-                auto dim_out = matrix_out->dims();
-                //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-                //  dim_out.size() ==
-                //  2,
-                //                 "The input and output of matmul be matrix");
-                //
-                //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-                //                     platform::is_cpu_place(matrix_b.place())
-                //                     &&
-                //                     platform::is_cpu_place(matrix_out->place()),
-                //                 "Matrix must all be in CPUPlace");
-
-                int M = dim_out[0];
-                int N = dim_out[1];
-                int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-
-                CBLAS_TRANSPOSE transA =
-                    (trans_a == false) ? CblasNoTrans : CblasTrans;
-                CBLAS_TRANSPOSE transB =
-                    (trans_b == false) ? CblasNoTrans : CblasTrans;
-
-                gemm<double>(transA, transB, M, N, K, alpha,
-                             matrix_a.data<double>(), matrix_b.data<double>(),
-                             beta, matrix_out->data<double>());
-            }
-
-        } // namespace math
-    }     // namespace operators
+namespace operators {
+namespace math {
+
+template<>
+void gemm<float>(const CBLAS_TRANSPOSE transA,
+                 const CBLAS_TRANSPOSE transB, const int M,
+                 const int N, const int K, const float alpha,
+                 const float *A, const float *B, const float beta,
+                 float *C) {
+  int lda = (transA == CblasNoTrans) ? K : M;
+  int ldb = (transB == CblasNoTrans) ? N : K;
+  int ldc = N;
+  cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
+              lda, B, ldb, beta, C, ldc);
+}
+
+template<>
+void gemm<double>(const CBLAS_TRANSPOSE transA,
+                  const CBLAS_TRANSPOSE transB, const int M,
+                  const int N, const int K, const double alpha,
+                  const double *A, const double *B,
+                  const double beta, double *C) {
+  int lda = (transA == CblasNoTrans) ? K : M;
+  int ldb = (transB == CblasNoTrans) ? N : K;
+  int ldc = N;
+  cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
+              lda, B, ldb, beta, C, ldc);
+}
+
+template<>
+void gemm<float>(const bool transA, const bool transB, const int M,
+                 const int N, const int K, const float alpha,
+                 const float *A, const int lda, const float *B,
+                 const int ldb, const float beta, float *C,
+                 const int ldc) {
+  cblas_sgemm(CblasRowMajor,
+              transA == false ? CblasNoTrans : CblasTrans,
+              transB == false ? CblasNoTrans : CblasTrans, M, N,
+              K, alpha, A, lda, B, ldb, beta, C, ldc);
+}
+
+template<>
+void gemm<double>(const bool transA, const bool transB, const int M,
+                  const int N, const int K, const double alpha,
+                  const double *A, const int lda, const double *B,
+                  const int ldb, const double beta, double *C,
+                  const int ldc) {
+  cblas_dgemm(CblasRowMajor,
+              transA == false ? CblasNoTrans : CblasTrans,
+              transB == false ? CblasNoTrans : CblasTrans, M, N,
+              K, alpha, A, lda, B, ldb, beta, C, ldc);
+}
+
+template<>
+void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
+                   const framework::Tensor &matrix_b, bool trans_b,
+                   float alpha, framework::Tensor *matrix_out,
+                   float beta) {
+  auto dim_a = matrix_a.dims();
+  auto dim_b = matrix_b.dims();
+  auto dim_out = matrix_out->dims();
+  //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+  //  dim_out.size() ==
+  //  2,
+  //                 "The input and output of matmul be matrix");
+  //
+  //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
+  //                     platform::is_cpu_place(matrix_b.place())
+  //                     &&
+  //                     platform::is_cpu_place(matrix_out->place()),
+  //                 "Matrix must all be in CPUPlace");
+
+  int M = dim_out[0];
+  int N = dim_out[1];
+  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+
+  CBLAS_TRANSPOSE transA =
+      (trans_a == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE transB =
+      (trans_b == false) ? CblasNoTrans : CblasTrans;
+
+  gemm<float>(transA, transB, M, N, K, alpha,
+              matrix_a.data<float>(), matrix_b.data<float>(),
+              beta, matrix_out->data<float>());
+}
+
+template<>
+void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
+                    const framework::Tensor &matrix_b, bool trans_b,
+                    double alpha, framework::Tensor *matrix_out,
+                    double beta) {
+  auto dim_a = matrix_a.dims();
+  auto dim_b = matrix_b.dims();
+  auto dim_out = matrix_out->dims();
+  //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+  //  dim_out.size() ==
+  //  2,
+  //                 "The input and output of matmul be matrix");
+  //
+  //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
+  //                     platform::is_cpu_place(matrix_b.place())
+  //                     &&
+  //                     platform::is_cpu_place(matrix_out->place()),
+  //                 "Matrix must all be in CPUPlace");
+
+  int M = dim_out[0];
+  int N = dim_out[1];
+  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+
+  CBLAS_TRANSPOSE transA =
+      (trans_a == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE transB =
+      (trans_b == false) ? CblasNoTrans : CblasTrans;
+
+  gemm<double>(transA, transB, M, N, K, alpha,
+               matrix_a.data<double>(), matrix_b.data<double>(),
+               beta, matrix_out->data<double>());
+}
+
+} // namespace math
+}     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/math_function.h
+++ b/src/operators/math/math_function.h
@@ -19,26 +19,26 @@ limitations under the License. */
 #include <cmath>

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
-
-            template <typename T>
-            void gemm(const CBLAS_TRANSPOSE transA,
-                      const CBLAS_TRANSPOSE transB, const int M, const int N,
-                      const int K, const T alpha, const T *A, const T *B,
-                      const T beta, T *C);
-
-            template <typename T>
-            void gemm(const bool transA, const bool transB, const int M,
-                      const int N, const int K, const T alpha, const T *A,
-                      const int lda, const T *B, const int ldb, const T beta,
-                      T *C, const int ldc);
-
-            // matrix multiply with continuous memory
-            template <typename T>
-            void matmul(const framework::Tensor &matrix_a, bool trans_a,
-                        const framework::Tensor &matrix_b, bool trans_b,
-                        T alpha, framework::Tensor *matrix_out, T beta);
-        } // namespace math
-    }     // namespace operators
+namespace operators {
+namespace math {
+
+template<typename T>
+void gemm(const CBLAS_TRANSPOSE transA,
+          const CBLAS_TRANSPOSE transB, const int M, const int N,
+          const int K, const T alpha, const T *A, const T *B,
+          const T beta, T *C);
+
+template<typename T>
+void gemm(const bool transA, const bool transB, const int M,
+          const int N, const int K, const T alpha, const T *A,
+          const int lda, const T *B, const int ldb, const T beta,
+          T *C, const int ldc);
+
+// matrix multiply with continuous memory
+template<typename T>
+void matmul(const framework::Tensor &matrix_a, bool trans_a,
+            const framework::Tensor &matrix_b, bool trans_b,
+            T alpha, framework::Tensor *matrix_out, T beta);
+} // namespace math
+}     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/transform.h
+++ b/src/operators/math/transform.h
@@ -17,41 +17,41 @@ limitations under the License. */
 #include <algorithm>

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
-
-            // Transform applys a unary or a binary functor on each element in a
-            // range defined by a pair of iterators.
-            //
-            // - The specialization for CPU calls std::transform.
-            // - The specialization for CUDA calls thrust::tranform.
-            //
-            // NOTE: We need to define InputIter and OutputIter defined as
-            //       different types, because the InputIter points op's inputs
-            //       and
-            //       OutputIter pints to op's outputs.
-            //
-            // NOTE: We don't assume that InputIter to be const InputType* and
-            //       OutputIter to be OutputType*, because we might use a
-            //       iterator
-            //       class, paddle::fluid::operators::RowwiseTRansformIterator.
-
-            struct Transform {
-                template <typename InputIter, typename OutputIter,
-                          typename UnaryOperation>
-                void operator()(InputIter first, InputIter last,
-                                OutputIter result, UnaryOperation op) {
-                    std::transform(first, last, result, op);
-                }
-
-                template <typename InputIter1, typename InputIter2,
-                          typename OutputIter, typename BinaryOperation>
-                void operator()(InputIter1 first1, InputIter1 last1,
-                                InputIter2 first2, OutputIter result,
-                                BinaryOperation op) {
-                    std::transform(first1, last1, first2, result, op);
-                }
-            };
-        }
-    } // namespace platform
+namespace operators {
+namespace math {
+
+// Transform applys a unary or a binary functor on each element in a
+// range defined by a pair of iterators.
+//
+// - The specialization for CPU calls std::transform.
+// - The specialization for CUDA calls thrust::tranform.
+//
+// NOTE: We need to define InputIter and OutputIter defined as
+//       different types, because the InputIter points op's inputs
+//       and
+//       OutputIter pints to op's outputs.
+//
+// NOTE: We don't assume that InputIter to be const InputType* and
+//       OutputIter to be OutputType*, because we might use a
+//       iterator
+//       class, paddle::fluid::operators::RowwiseTRansformIterator.
+
+struct Transform {
+  template<typename InputIter, typename OutputIter,
+      typename UnaryOperation>
+  void operator()(InputIter first, InputIter last,
+                  OutputIter result, UnaryOperation op) {
+    std::transform(first, last, result, op);
+  }
+
+  template<typename InputIter1, typename InputIter2,
+      typename OutputIter, typename BinaryOperation>
+  void operator()(InputIter1 first1, InputIter1 last1,
+                  InputIter2 first2, OutputIter result,
+                  BinaryOperation op) {
+    std::transform(first1, last1, first2, result, op);
+  }
+};
+}
+} // namespace platform
 } // namespace paddle
--- a/src/operators/math/vol2col.cc
+++ b/src/operators/math/vol2col.cc
@@ -15,212 +15,212 @@ limitations under the License. */
 #include "vol2col.h"

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
-
-            using Tensor = paddle_mobile::framework::Tensor;
-            /*
-             * vol = [input_channels, input_depth, input_height, input_width]
-             * col =
-             *   [input_channels, filter_depth, filter_height, filter_width,
-             *                    output_depth, output_height, output_width]
-             */
-            template <typename T> class Vol2ColFunctor<CPU, T> {
-              public:
-                void operator()(const Tensor &vol,
-                                const std::vector<int> &dilations,
-                                const std::vector<int> &strides,
-                                const std::vector<int> &paddings,
-                                Tensor *col) const {
-                    //    PADDLE_ENFORCE(vol.dims().size() == 4);
-                    //    PADDLE_ENFORCE(col->dims().size() == 7);
-
-                    int input_channels = vol.dims()[0];
-                    int input_depth = vol.dims()[1];
-                    int input_height = vol.dims()[2];
-                    int input_width = vol.dims()[3];
-                    int filter_depth = col->dims()[1];
-                    int filter_height = col->dims()[2];
-                    int filter_width = col->dims()[3];
-                    int output_depth = col->dims()[4];
-                    int output_height = col->dims()[5];
-                    int output_width = col->dims()[6];
-                    int channels_col = input_channels * filter_depth *
-                                       filter_height * filter_width;
-
-                    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-                    //                       ((dilations[0] * (filter_depth - 1)
-                    //                       + 1))) /
-                    //                              strides[0] +
-                    //                          1,
-                    //                      output_depth,
-                    //                      "input_depth and output_depth are "
-                    //                      "mismatching.");
-                    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-                    //                       ((dilations[1] * (filter_height -
-                    //                       1) + 1))) /
-                    //                              strides[1] +
-                    //                          1,
-                    //                      output_height,
-                    //                      "input_height and output_height are
-                    //                      "
-                    //                      "mismatching.");
-                    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-                    //                       ((dilations[2] * (filter_width - 1)
-                    //                       + 1))) /
-                    //                              strides[2] +
-                    //                          1,
-                    //                      output_width,
-                    //                      "input_width and output_width are "
-                    //                      "mismatching.");
-
-                    const T *vol_data = vol.data<T>();
-                    T *col_data = col->data<T>();
-
-                    for (int c = 0; c < channels_col; ++c) {
-                        int w_offset = c % filter_width;
-                        int h_offset = (c / filter_width) % filter_height;
-                        int d_offset =
-                            (c / filter_width / filter_height) % filter_depth;
-                        int c_in =
-                            c / filter_width / filter_height / filter_depth;
-                        for (int d = 0; d < output_depth; ++d) {
-                            int d_pad = d * strides[0] - paddings[0] +
-                                        d_offset * dilations[0];
-                            for (int h = 0; h < output_height; ++h) {
-                                int h_pad = h * strides[1] - paddings[1] +
-                                            h_offset * dilations[1];
-                                for (int w = 0; w < output_width; ++w) {
-                                    int w_pad = w * strides[2] - paddings[2] +
-                                                w_offset * dilations[2];
-
-                                    int col_idx = ((c * output_depth + d) *
-                                                       output_height +
-                                                   h) *
-                                                      output_width +
-                                                  w;
-                                    int vol_idx =
-                                        ((c_in * input_depth + d_pad) *
-                                             input_height +
-                                         h_pad) *
-                                            input_width +
-                                        w_pad;
-                                    col_data[col_idx] =
-                                        (h_pad < 0 || h_pad >= input_height ||
-                                         w_pad < 0 || w_pad >= input_width ||
-                                         d_pad < 0 || d_pad >= input_depth)
-                                            ? static_cast<T>(0)
-                                            : vol_data[vol_idx];
-                                }
-                            }
-                        }
-                    }
-                }
-            };
-
-            /*
-             * vol = [input_channels,input_depth, input_height, input_width]
-             * col =
-             *   [input_channels, filter_depth, filter_height, filter_width,
-             *                    output_depth, output_height, output_width]
-             */
-            template <typename T> class Col2VolFunctor<CPU, T> {
-              public:
-                void operator()(const Tensor &col,
-                                const std::vector<int> &dilations,
-                                const std::vector<int> &strides,
-                                const std::vector<int> &paddings,
-                                Tensor *vol) const {
-                    //    PADDLE_ENFORCE(vol->dims().size() == 4);
-                    //    PADDLE_ENFORCE(col.dims().size() == 7);
-
-                    int input_channels = vol->dims()[0];
-                    int input_depth = vol->dims()[1];
-                    int input_height = vol->dims()[2];
-                    int input_width = vol->dims()[3];
-                    int filter_depth = col.dims()[1];
-                    int filter_height = col.dims()[2];
-                    int filter_width = col.dims()[3];
-                    int output_depth = col.dims()[4];
-                    int output_height = col.dims()[5];
-                    int output_width = col.dims()[6];
-                    int channels_col = input_channels * filter_depth *
-                                       filter_height * filter_width;
-
-                    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-                    //                       ((dilations[0] * (filter_depth - 1)
-                    //                       + 1))) /
-                    //                              strides[0] +
-                    //                          1,
-                    //                      output_depth,
-                    //                      "input_depth and output_depth are "
-                    //                      "mismatching.");
-                    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-                    //                       ((dilations[1] * (filter_height -
-                    //                       1) + 1))) /
-                    //                              strides[1] +
-                    //                          1,
-                    //                      output_height,
-                    //                      "input_height and output_height are
-                    //                      "
-                    //                      "mismatching.");
-                    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-                    //                       ((dilations[2] * (filter_width - 1)
-                    //                       + 1))) /
-                    //                              strides[2] +
-                    //                          1,
-                    //                      output_width,
-                    //                      "input_width and output_width are "
-                    //                      "mismatching.");
-                    T *vol_data = vol->data<T>();
-                    const T *col_data = col.data<T>();
-
-                    for (int c = 0; c < channels_col; ++c) {
-                        int w_offset = c % filter_width;
-                        int h_offset = (c / filter_width) % filter_height;
-                        int d_offset =
-                            (c / filter_width / filter_height) % filter_depth;
-                        int cIm =
-                            c / filter_width / filter_height / filter_depth;
-                        for (int d = 0; d < output_depth; ++d) {
-                            int d_pad = d * strides[0] - paddings[0] +
-                                        d_offset * dilations[0];
-                            for (int h = 0; h < output_height; ++h) {
-                                int h_pad = h * strides[1] - paddings[1] +
-                                            h_offset * dilations[1];
-                                for (int w = 0; w < output_width; ++w) {
-                                    int w_pad = w * strides[2] - paddings[2] +
-                                                w_offset * dilations[2];
-
-                                    if (h_pad >= 0 && h_pad < input_height &&
-                                        w_pad >= 0 && w_pad < input_width &&
-                                        d_pad >= 0 && d_pad < input_depth) {
-                                        int vol_idx =
-                                            ((cIm * input_depth + d_pad) *
-                                                 input_height +
-                                             h_pad) *
-                                                input_width +
-                                            w_pad;
-
-                                        int col_idx = ((c * output_depth + d) *
-                                                           output_height +
-                                                       h) *
-                                                          output_width +
-                                                      w;
-                                        vol_data[vol_idx] += col_data[col_idx];
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            };
-
-            template class Vol2ColFunctor<CPU, float>;
-            template class Vol2ColFunctor<CPU, double>;
-            template class Col2VolFunctor<CPU, float>;
-            template class Col2VolFunctor<CPU, double>;
-
-        } // namespace math
-    }     // namespace operators
+namespace operators {
+namespace math {
+
+using Tensor = paddle_mobile::framework::Tensor;
+/*
+ * vol = [input_channels, input_depth, input_height, input_width]
+ * col =
+ *   [input_channels, filter_depth, filter_height, filter_width,
+ *                    output_depth, output_height, output_width]
+ */
+template<typename T> class Vol2ColFunctor<CPU, T> {
+public:
+  void operator()(const Tensor &vol,
+                  const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings,
+                  Tensor *col) const {
+    //    PADDLE_ENFORCE(vol.dims().size() == 4);
+    //    PADDLE_ENFORCE(col->dims().size() == 7);
+
+    int input_channels = vol.dims()[0];
+    int input_depth = vol.dims()[1];
+    int input_height = vol.dims()[2];
+    int input_width = vol.dims()[3];
+    int filter_depth = col->dims()[1];
+    int filter_height = col->dims()[2];
+    int filter_width = col->dims()[3];
+    int output_depth = col->dims()[4];
+    int output_height = col->dims()[5];
+    int output_width = col->dims()[6];
+    int channels_col = input_channels * filter_depth *
+        filter_height * filter_width;
+
+    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+    //                       ((dilations[0] * (filter_depth - 1)
+    //                       + 1))) /
+    //                              strides[0] +
+    //                          1,
+    //                      output_depth,
+    //                      "input_depth and output_depth are "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+    //                       ((dilations[1] * (filter_height -
+    //                       1) + 1))) /
+    //                              strides[1] +
+    //                          1,
+    //                      output_height,
+    //                      "input_height and output_height are
+    //                      "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+    //                       ((dilations[2] * (filter_width - 1)
+    //                       + 1))) /
+    //                              strides[2] +
+    //                          1,
+    //                      output_width,
+    //                      "input_width and output_width are "
+    //                      "mismatching.");
+
+    const T *vol_data = vol.data<T>();
+    T *col_data = col->data<T>();
+
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int d_offset =
+          (c / filter_width / filter_height) % filter_depth;
+      int c_in =
+          c / filter_width / filter_height / filter_depth;
+      for (int d = 0; d < output_depth; ++d) {
+        int d_pad = d * strides[0] - paddings[0] +
+            d_offset * dilations[0];
+        for (int h = 0; h < output_height; ++h) {
+          int h_pad = h * strides[1] - paddings[1] +
+              h_offset * dilations[1];
+          for (int w = 0; w < output_width; ++w) {
+            int w_pad = w * strides[2] - paddings[2] +
+                w_offset * dilations[2];
+
+            int col_idx = ((c * output_depth + d) *
+                output_height +
+                h) *
+                output_width +
+                w;
+            int vol_idx =
+                ((c_in * input_depth + d_pad) *
+                    input_height +
+                    h_pad) *
+                    input_width +
+                    w_pad;
+            col_data[col_idx] =
+                (h_pad < 0 || h_pad >= input_height ||
+                    w_pad < 0 || w_pad >= input_width ||
+                    d_pad < 0 || d_pad >= input_depth)
+                ? static_cast<T>(0)
+                : vol_data[vol_idx];
+          }
+        }
+      }
+    }
+  }
+};
+
+/*
+ * vol = [input_channels,input_depth, input_height, input_width]
+ * col =
+ *   [input_channels, filter_depth, filter_height, filter_width,
+ *                    output_depth, output_height, output_width]
+ */
+template<typename T> class Col2VolFunctor<CPU, T> {
+public:
+  void operator()(const Tensor &col,
+                  const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings,
+                  Tensor *vol) const {
+    //    PADDLE_ENFORCE(vol->dims().size() == 4);
+    //    PADDLE_ENFORCE(col.dims().size() == 7);
+
+    int input_channels = vol->dims()[0];
+    int input_depth = vol->dims()[1];
+    int input_height = vol->dims()[2];
+    int input_width = vol->dims()[3];
+    int filter_depth = col.dims()[1];
+    int filter_height = col.dims()[2];
+    int filter_width = col.dims()[3];
+    int output_depth = col.dims()[4];
+    int output_height = col.dims()[5];
+    int output_width = col.dims()[6];
+    int channels_col = input_channels * filter_depth *
+        filter_height * filter_width;
+
+    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+    //                       ((dilations[0] * (filter_depth - 1)
+    //                       + 1))) /
+    //                              strides[0] +
+    //                          1,
+    //                      output_depth,
+    //                      "input_depth and output_depth are "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+    //                       ((dilations[1] * (filter_height -
+    //                       1) + 1))) /
+    //                              strides[1] +
+    //                          1,
+    //                      output_height,
+    //                      "input_height and output_height are
+    //                      "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+    //                       ((dilations[2] * (filter_width - 1)
+    //                       + 1))) /
+    //                              strides[2] +
+    //                          1,
+    //                      output_width,
+    //                      "input_width and output_width are "
+    //                      "mismatching.");
+    T *vol_data = vol->data<T>();
+    const T *col_data = col.data<T>();
+
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int d_offset =
+          (c / filter_width / filter_height) % filter_depth;
+      int cIm =
+          c / filter_width / filter_height / filter_depth;
+      for (int d = 0; d < output_depth; ++d) {
+        int d_pad = d * strides[0] - paddings[0] +
+            d_offset * dilations[0];
+        for (int h = 0; h < output_height; ++h) {
+          int h_pad = h * strides[1] - paddings[1] +
+              h_offset * dilations[1];
+          for (int w = 0; w < output_width; ++w) {
+            int w_pad = w * strides[2] - paddings[2] +
+                w_offset * dilations[2];
+
+            if (h_pad >= 0 && h_pad < input_height &&
+                w_pad >= 0 && w_pad < input_width &&
+                d_pad >= 0 && d_pad < input_depth) {
+              int vol_idx =
+                  ((cIm * input_depth + d_pad) *
+                      input_height +
+                      h_pad) *
+                      input_width +
+                      w_pad;
+
+              int col_idx = ((c * output_depth + d) *
+                  output_height +
+                  h) *
+                  output_width +
+                  w;
+              vol_data[vol_idx] += col_data[col_idx];
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
+template class Vol2ColFunctor<CPU, float>;
+template class Vol2ColFunctor<CPU, double>;
+template class Col2VolFunctor<CPU, float>;
+template class Col2VolFunctor<CPU, double>;
+
+} // namespace math
+}     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/vol2col.h
+++ b/src/operators/math/vol2col.h
@@ -18,78 +18,78 @@ limitations under the License. */
 #include "framework/tensor.h"

 namespace paddle_mobile {
-    namespace operators {
-        namespace math {
-            /*
-             * \brief Converts the feature data of four dimensions(CDHW) into a
-             * colData of
-             *        seven dimensions in the Vol2ColFunctor calculation,
-             *        And in the Col2VolFunctor calculation, it is reversed.
-             *
-             * \param volData   Vol data.
-             * \param volShape  The shape of volData,
-             *                 [input_channels, input_depth, input_height,
-             * input_width].
-             * \param colData  Column data.
-             * \param colShape The shape of colData.
-             *
-             * \param dilations    dilation data.
-             * \param 3-dimension  [dilation_depth, dilation_height,
-             * dilation_width].
-             *
-             * \param strides      stride data.
-             * \param 3-dimension  [stride_depth, stride_height, stride_width].
-             *
-             * \param paddings     padding data.
-             * \param 3-dimension  [d_pad, h_pad, w_pad].
-             *
-             * The shape of colData is:
-             * [input_channels, filter_depth, filter_height, filter_width,
-             * output_depth,
-             * output_height, output_width]
-             * So, it is easy to reshape into a convolution matrix for
-             * convolution
-             * calculation based on matrix multiplication.
-             * The shape of convolution matrix is [height, width], where the
-             * height is equal
-             * input_channels * filter_depth * filter_height * filter_width, and
-             * the width
-             * is equal output_depth * output_height * output_width.
-             *
-             * Reshape:
-             *     shape of colData           shape of convolution matrix
-             *     [input_channels,
-             *      filter_depth,
-             *      filter_height,
-             *      filter_width,      ======>      [height, width]
-             *      output_depth,
-             *      output_height,
-             *      output_width]
-             *
-             * \note The caller needs to ensure that volShape.inputChannels is
-             * equal to
-             *       colShape.inputChannels.
-             */
-            using Tensor = paddle_mobile::framework::Tensor;
+namespace operators {
+namespace math {
+/*
+ * \brief Converts the feature data of four dimensions(CDHW) into a
+ * colData of
+ *        seven dimensions in the Vol2ColFunctor calculation,
+ *        And in the Col2VolFunctor calculation, it is reversed.
+ *
+ * \param volData   Vol data.
+ * \param volShape  The shape of volData,
+ *                 [input_channels, input_depth, input_height,
+ * input_width].
+ * \param colData  Column data.
+ * \param colShape The shape of colData.
+ *
+ * \param dilations    dilation data.
+ * \param 3-dimension  [dilation_depth, dilation_height,
+ * dilation_width].
+ *
+ * \param strides      stride data.
+ * \param 3-dimension  [stride_depth, stride_height, stride_width].
+ *
+ * \param paddings     padding data.
+ * \param 3-dimension  [d_pad, h_pad, w_pad].
+ *
+ * The shape of colData is:
+ * [input_channels, filter_depth, filter_height, filter_width,
+ * output_depth,
+ * output_height, output_width]
+ * So, it is easy to reshape into a convolution matrix for
+ * convolution
+ * calculation based on matrix multiplication.
+ * The shape of convolution matrix is [height, width], where the
+ * height is equal
+ * input_channels * filter_depth * filter_height * filter_width, and
+ * the width
+ * is equal output_depth * output_height * output_width.
+ *
+ * Reshape:
+ *     shape of colData           shape of convolution matrix
+ *     [input_channels,
+ *      filter_depth,
+ *      filter_height,
+ *      filter_width,      ======>      [height, width]
+ *      output_depth,
+ *      output_height,
+ *      output_width]
+ *
+ * \note The caller needs to ensure that volShape.inputChannels is
+ * equal to
+ *       colShape.inputChannels.
+ */
+using Tensor = paddle_mobile::framework::Tensor;

-            template <typename DeviceType, typename T> class Vol2ColFunctor {
-              public:
-                void operator()(const Tensor &vol,
-                                const std::vector<int> &dilations,
-                                const std::vector<int> &strides,
-                                const std::vector<int> &paddings,
-                                Tensor *col) const;
-            };
+template<typename DeviceType, typename T> class Vol2ColFunctor {
+public:
+  void operator()(const Tensor &vol,
+                  const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings,
+                  Tensor *col) const;
+};

-            template <typename DeviceType, typename T> class Col2VolFunctor {
-              public:
-                void operator()(const Tensor &col,
-                                const std::vector<int> &dilations,
-                                const std::vector<int> &strides,
-                                const std::vector<int> &paddings,
-                                Tensor *vol) const;
-            };
+template<typename DeviceType, typename T> class Col2VolFunctor {
+public:
+  void operator()(const Tensor &col,
+                  const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings,
+                  Tensor *vol) const;
+};

-        } // namespace math
-    }     // namespace operators
+} // namespace math
+}     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -19,39 +19,39 @@ SOFTWARE.
 #include "mul_op.h"

 namespace paddle_mobile {
-    namespace operators {
+namespace operators {

-        template <typename Dtype, typename T>
-        void MulOp<Dtype, T>::InferShape() const {
-            auto x_dims = param_.InputX()->dims();
-            auto y_dims = param_.InputY()->dims();
-            int x_num_col_dims = param_.XNumColDims();
-            int y_num_col_dims = param_.YNumColDims();
+template<typename Dtype, typename T>
+void MulOp<Dtype, T>::InferShape() const {
+  auto x_dims = param_.InputX()->dims();
+  auto y_dims = param_.InputY()->dims();
+  int x_num_col_dims = param_.XNumColDims();
+  int y_num_col_dims = param_.YNumColDims();

-            assert(x_dims.size() > x_num_col_dims);
-            assert(y_dims.size() > y_num_col_dims);
+  assert(x_dims.size() > x_num_col_dims);
+  assert(y_dims.size() > y_num_col_dims);

-            /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
-            auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
-            auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
+  /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
+  auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
+  auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);

-            assert(x_mat_dims[1] == y_mat_dims[0]);
+  assert(x_mat_dims[1] == y_mat_dims[0]);

-            std::vector<int64_t> output_dims;
-            output_dims.reserve(static_cast<size_t>(
-                x_num_col_dims + y_dims.size() - y_num_col_dims));
+  std::vector<int64_t> output_dims;
+  output_dims.reserve(static_cast<size_t>(
+                          x_num_col_dims + y_dims.size() - y_num_col_dims));

-            for (int i = 0; i < x_num_col_dims; ++i) {
-                output_dims.push_back(x_dims[i]);
-            }
+  for (int i = 0; i < x_num_col_dims; ++i) {
+    output_dims.push_back(x_dims[i]);
+  }

-            for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
-                output_dims.push_back(y_dims[i]);
-            }
+  for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
+    output_dims.push_back(y_dims[i]);
+  }

-            framework::DDim ddim = framework::make_ddim(output_dims);
-            param_.Out()->Resize(ddim);
-        }
-        template class MulOp<CPU, float>;
-    }
+  framework::DDim ddim = framework::make_ddim(output_dims);
+  param_.Out()->Resize(ddim);
+}
+template class MulOp<CPU, float>;
+}
 }
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -21,32 +21,32 @@ SOFTWARE.
 #include "operators/op_param.h"

 namespace paddle_mobile {
-    namespace operators {
-
-        using namespace framework;
-
-        template <typename DeviceType, typename T>
-        class MulOp : public framework::OperatorWithKernel<DeviceType> {
-          public:
-            MulOp(const std::string &type, const VariableNameMap &inputs,
-                  const VariableNameMap &outputs,
-                  const framework::AttributeMap attrs,
-                  std::shared_ptr<framework::Scope> scope)
-                : framework::OperatorWithKernel<DeviceType>(
-                      type, inputs, outputs, attrs, scope),
-                  param_(inputs, outputs, attrs, *scope) {}
-
-            void Run() const {
-                operators::MulKernel<DeviceType, T, MulParam> kernel;
-                kernel.Compute(param_);
-            }
-
-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            void InferShape() const override;
-
-          protected:
-            MulParam param_;
-        };
-
-    } // namespace operators
+namespace operators {
+
+using namespace framework;
+
+template<typename DeviceType, typename T>
+class MulOp : public framework::OperatorWithKernel<DeviceType> {
+public:
+  MulOp(const std::string &type, const VariableNameMap &inputs,
+        const VariableNameMap &outputs,
+        const framework::AttributeMap attrs,
+        std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(
+      type, inputs, outputs, attrs, scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  void Run() const {
+    operators::MulKernel<DeviceType, T, MulParam> kernel;
+    kernel.Compute(param_);
+  }
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+protected:
+  MulParam param_;
+};
+
+} // namespace operators
 } // namespace paddle
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -19,27 +19,27 @@ SOFTWARE.
 #include "op_param.h"

 namespace paddle_mobile {
-    namespace operators {
-        Print &operator<<(Print &printer, const ConvParam &conv_param) {
-            printer << "parameter of conv: "
-                    << "\n";
-            printer << "  stride: "
-                    << " (" << conv_param.Strides()[0]
-                    << conv_param.Strides()[1] << ") "
-                    << "\n";
-            printer << "  paddings: "
-                    << " (" << conv_param.Paddings()[0]
-                    << conv_param.Paddings()[1] << ") "
-                    << "\n";
-            printer << "  dilations: "
-                    << " (" << conv_param.Dilations()[0]
-                    << conv_param.Dilations()[1] << ") "
-                    << "\n";
-            printer << "  groups: " << conv_param.Groups() << "\n";
-            printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
-            printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
-            printer << "  output dims: " << conv_param.Output()->dims();
-            return printer;
-        }
-    } // namespace operators
+namespace operators {
+Print &operator<<(Print &printer, const ConvParam &conv_param) {
+  printer << "parameter of conv: "
+          << "\n";
+  printer << "  stride: "
+          << " (" << conv_param.Strides()[0]
+          << conv_param.Strides()[1] << ") "
+          << "\n";
+  printer << "  paddings: "
+          << " (" << conv_param.Paddings()[0]
+          << conv_param.Paddings()[1] << ") "
+          << "\n";
+  printer << "  dilations: "
+          << " (" << conv_param.Dilations()[0]
+          << conv_param.Dilations()[1] << ") "
+          << "\n";
+  printer << "  groups: " << conv_param.Groups() << "\n";
+  printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
+  printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
+  printer << "  output dims: " << conv_param.Output()->dims();
+  return printer;
+}
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -26,211 +26,211 @@ SOFTWARE.
 #include "framework/variable.h"

 namespace paddle_mobile {
-    namespace operators {
-
-        using namespace framework;
-
-        class OpParam : PaddleMobileObject {
-          public:
-          protected:
-            template <typename T>
-            static T *InputFrom(const VariableNameMap &inputs,
-                                const Scope &scope) {
-                return GetVarValue<T>("Input", inputs, scope);
-            }
-
-            template <typename T>
-            static T *InputXFrom(const VariableNameMap &inputs,
-                                 const Scope &scope) {
-                return GetVarValue<T>("X", inputs, scope);
-            }
-
-            template <typename T>
-            static T *InputYFrom(const VariableNameMap &inputs,
-                                 const Scope &scope) {
-                return GetVarValue<T>("Y", inputs, scope);
-            }
-
-            template <typename T>
-            static std::vector<T *>
-            InputMultiFrom(const VariableNameMap &inputs, const Scope &scope) {
-                return GetMultiVarValue<T>("Input", inputs, scope);
-            }
-
-            template <typename T>
-            static T *OutputFrom(const VariableNameMap &outputs,
-                                 const Scope &scope) {
-                return GetVarValue<T>("Output", outputs, scope);
-            }
-
-            template <typename T>
-            static T *OutFrom(const VariableNameMap &outputs,
-                              const Scope &scope) {
-                return GetVarValue<T>("Out", outputs, scope);
-            }
-
-            template <typename T>
-            static T *FilterFrom(const VariableNameMap &inputs,
-                                 const Scope &scope) {
-                return GetVarValue<T>("Filter", inputs, scope);
-            }
-
-            template <typename T>
-            static const T GetAttr(std::string key, const AttributeMap &map) {
-                return ((Attribute)map.at(key)).Get<T>();
-            }
-
-            template <typename T>
-            static T *GetVarValue(std::string key,
-                                  const VariableNameMap &var_map,
-                                  const Scope &scope) {
-                auto var_vec = var_map.at(key);
-                if (var_vec.size()) {
-                    //      std::cout << " get var value -- " << var_vec[0] <<
-                    //      std::endl;
-                    auto var = scope.FindVar(var_vec[0]);
-                    return var->GetMutable<T>();
-                } else {
-                    return nullptr;
-                }
-            }
-
-            template <typename T>
-            static std::vector<T *>
-            GetMultiVarValue(std::string key, const VariableNameMap &var_map,
-                             const Scope &scope) {
-                auto var_vecs = var_map.at(key);
-                assert(var_vecs.size() > 1);
-                std::vector<T *> var_res;
-                for (auto &var_vec : var_vecs) {
-                    auto var = scope.FindVar(var_vec);
-                    var_res.push_back(var->GetMutable<T>());
-                }
-                return var_res;
-            }
-        };
-
-        class ConvParam : OpParam {
-          public:
-            ConvParam(const VariableNameMap &inputs,
+namespace operators {
+
+using namespace framework;
+
+class OpParam : PaddleMobileObject {
+public:
+protected:
+  template<typename T>
+  static T *InputFrom(const VariableNameMap &inputs,
+                      const Scope &scope) {
+    return GetVarValue<T>("Input", inputs, scope);
+  }
+
+  template<typename T>
+  static T *InputXFrom(const VariableNameMap &inputs,
+                       const Scope &scope) {
+    return GetVarValue<T>("X", inputs, scope);
+  }
+
+  template<typename T>
+  static T *InputYFrom(const VariableNameMap &inputs,
+                       const Scope &scope) {
+    return GetVarValue<T>("Y", inputs, scope);
+  }
+
+  template<typename T>
+  static std::vector<T *>
+  InputMultiFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetMultiVarValue<T>("Input", inputs, scope);
+  }
+
+  template<typename T>
+  static T *OutputFrom(const VariableNameMap &outputs,
+                       const Scope &scope) {
+    return GetVarValue<T>("Output", outputs, scope);
+  }
+
+  template<typename T>
+  static T *OutFrom(const VariableNameMap &outputs,
+                    const Scope &scope) {
+    return GetVarValue<T>("Out", outputs, scope);
+  }
+
+  template<typename T>
+  static T *FilterFrom(const VariableNameMap &inputs,
+                       const Scope &scope) {
+    return GetVarValue<T>("Filter", inputs, scope);
+  }
+
+  template<typename T>
+  static const T GetAttr(std::string key, const AttributeMap &map) {
+    return ((Attribute) map.at(key)).Get<T>();
+  }
+
+  template<typename T>
+  static T *GetVarValue(std::string key,
+                        const VariableNameMap &var_map,
+                        const Scope &scope) {
+    auto var_vec = var_map.at(key);
+    if (var_vec.size()) {
+      //      std::cout << " get var value -- " << var_vec[0] <<
+      //      std::endl;
+      auto var = scope.FindVar(var_vec[0]);
+      return var->GetMutable<T>();
+    } else {
+      return nullptr;
+    }
+  }
+
+  template<typename T>
+  static std::vector<T *>
+  GetMultiVarValue(std::string key, const VariableNameMap &var_map,
+                   const Scope &scope) {
+    auto var_vecs = var_map.at(key);
+    assert(var_vecs.size() > 1);
+    std::vector<T *> var_res;
+    for (auto &var_vec : var_vecs) {
+      auto var = scope.FindVar(var_vec);
+      var_res.push_back(var->GetMutable<T>());
+    }
+    return var_res;
+  }
+};
+
+class ConvParam : OpParam {
+public:
+  ConvParam(const VariableNameMap &inputs,
+            const VariableNameMap &outputs,
+            const framework::AttributeMap &attrs,
+            const framework::Scope &scope) {
+    filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
+    input_ = InputFrom<framework::Tensor>(inputs, scope);
+    output_ = OutputFrom<framework::Tensor>(outputs, scope);
+    strides_ = GetAttr<std::vector<int>>("strides", attrs);
+    paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+  }
+
+  const Tensor *Input() const { return input_; }
+
+  const LoDTensor *Filter() const { return filter_; }
+
+  Tensor *Output() const { return output_; }
+
+  const std::vector<int> &Strides() const { return strides_; }
+
+  const std::vector<int> &Paddings() const { return paddings_; }
+
+  const std::vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
+private:
+  Tensor *input_;
+  Tensor *output_;
+  LoDTensor *filter_;
+  std::vector<int> strides_;
+  std::vector<int> paddings_;
+  std::vector<int> dilations_;
+  int groups;
+};
+
+Print &operator<<(Print &printer, const ConvParam &conv_param);
+
+class ElementwiseAddParam : OpParam {
+public:
+  ElementwiseAddParam(const VariableNameMap &inputs,
                      const VariableNameMap &outputs,
                      const framework::AttributeMap &attrs,
                      const framework::Scope &scope) {
-                filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
-                input_ = InputFrom<framework::Tensor>(inputs, scope);
-                output_ = OutputFrom<framework::Tensor>(outputs, scope);
-                strides_ = GetAttr<std::vector<int>>("strides", attrs);
-                paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-                dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
-                groups = GetAttr<int>("groups", attrs);
-            }
-
-            const Tensor *Input() const { return input_; }
-
-            const LoDTensor *Filter() const { return filter_; }
-
-            Tensor *Output() const { return output_; }
-
-            const std::vector<int> &Strides() const { return strides_; }
-
-            const std::vector<int> &Paddings() const { return paddings_; }
-
-            const std::vector<int> &Dilations() const { return dilations_; }
-
-            const int &Groups() const { return groups; }
-
-          private:
-            Tensor *input_;
-            Tensor *output_;
-            LoDTensor *filter_;
-            std::vector<int> strides_;
-            std::vector<int> paddings_;
-            std::vector<int> dilations_;
-            int groups;
-        };
-
-        Print &operator<<(Print &printer, const ConvParam &conv_param);
-
-        class ElementwiseAddParam : OpParam {
-          public:
-            ElementwiseAddParam(const VariableNameMap &inputs,
-                                const VariableNameMap &outputs,
-                                const framework::AttributeMap &attrs,
-                                const framework::Scope &scope) {
-                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
-                axis_ = GetAttr<int>("axis", attrs);
-            }
-
-            const Tensor *InputX() const { return input_x_; }
-
-            const Tensor *InputY() const { return input_y_; }
-
-            Tensor *Out() const { return out_; }
-
-            const int &Axis() const { return axis_; }
-
-          private:
-            Tensor *input_x_;
-            Tensor *input_y_;
-            Tensor *out_;
-            int axis_;
-        };
-
-        class MulParam : OpParam {
-          public:
-            MulParam(const VariableNameMap &inputs,
-                     const VariableNameMap &outputs,
-                     const framework::AttributeMap &attrs,
-                     const framework::Scope &scope) {
-                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
-                x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
-                y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
-            }
-
-            const Tensor *InputX() const { return input_x_; }
-
-            const Tensor *InputY() const { return input_y_; }
-
-            Tensor *Out() const { return out_; }
-
-            const int &XNumColDims() const { return x_num_col_dims_; }
-
-            const int &YNumColDims() const { return y_num_col_dims_; }
-
-          private:
-            Tensor *input_x_;
-            Tensor *input_y_;
-            Tensor *out_;
-            int x_num_col_dims_;
-            int y_num_col_dims_;
-        };
-
-        class ConcatParam : public OpParam {
-          public:
-            ConcatParam(const VariableNameMap &inputs,
-                        const VariableNameMap &outputs,
-                        const framework::AttributeMap &attrs,
-                        const framework::Scope &scope) {
-                inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
-                axis_ = GetAttr<int>("axis", attrs);
-            }
-
-            std::vector<Tensor *> Inputs() const { return inputs_; }
-
-            Tensor *Out() const { return out_; }
-
-            const int &Axis() const { return axis_; }
-
-          private:
-            std::vector<Tensor *> inputs_;
-            Tensor *out_;
-            int axis_;
-        };
-
-    } // namespace operators
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+  }
+
+  const Tensor *InputX() const { return input_x_; }
+
+  const Tensor *InputY() const { return input_y_; }
+
+  Tensor *Out() const { return out_; }
+
+  const int &Axis() const { return axis_; }
+
+private:
+  Tensor *input_x_;
+  Tensor *input_y_;
+  Tensor *out_;
+  int axis_;
+};
+
+class MulParam : OpParam {
+public:
+  MulParam(const VariableNameMap &inputs,
+           const VariableNameMap &outputs,
+           const framework::AttributeMap &attrs,
+           const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
+    y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
+  }
+
+  const Tensor *InputX() const { return input_x_; }
+
+  const Tensor *InputY() const { return input_y_; }
+
+  Tensor *Out() const { return out_; }
+
+  const int &XNumColDims() const { return x_num_col_dims_; }
+
+  const int &YNumColDims() const { return y_num_col_dims_; }
+
+private:
+  Tensor *input_x_;
+  Tensor *input_y_;
+  Tensor *out_;
+  int x_num_col_dims_;
+  int y_num_col_dims_;
+};
+
+class ConcatParam : public OpParam {
+public:
+  ConcatParam(const VariableNameMap &inputs,
+              const VariableNameMap &outputs,
+              const framework::AttributeMap &attrs,
+              const framework::Scope &scope) {
+    inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+  }
+
+  std::vector<Tensor *> Inputs() const { return inputs_; }
+
+  Tensor *Out() const { return out_; }
+
+  const int &Axis() const { return axis_; }
+
+private:
+  std::vector<Tensor *> inputs_;
+  Tensor *out_;
+  int axis_;
+};
+
+} // namespace operators
 } // namespace paddle_mobile
--- a/tools/pre-commit.hooks/cpplint.bash
+++ b/tools/pre-commit.hooks/cpplint.bash
 #!/bin/bash
+set -e

 TOTAL_ERRORS=0

-#iclang-tidy *.[ch]pp -checks=* 
 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
-for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'|grep -v ".pb." | grep -v "third-party/"); do
-    cpplint $file
+for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep "src" | grep -v ".pb."); do
+    echo "clang-tidy formating $file" 
+    clang-tidy $file 
    TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
 done

 exit $TOTAL_ERRORS
-
--- a/tools/pre-commit.hooks/.clang_format.hook
+++ b/tools/pre-commit.hooks/.clang_format.hook
-#!/bin/bash
-set -e
-
-readonly VERSION="3.8"
-
-version=$(clang-format -version)
-
-if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
-fi
-
-clang-format $@