Merge pull request #198 from allonli/develop

Add clang-tidy and clang-format hook

Merge pull request #198 from allonli/develop
Add clang-tidy and clang-format hook
45de1e84 · 朔-望 · GitHub · 9b7798f9 · 63dbcb6f · 45de1e84
94 changed file
--- a/.clang-format
+++ b/.clang-format
@@ -3,5 +3,4 @@ Language:        Cpp
 BasedOnStyle:  LLVM
 Standard:  Cpp11 
 IndentWidth: 4
-NamespaceIndentation: All
 ...
--- a/.clang-tidy
+++ b/.clang-tidy
+Checks: >
+  *
+  -android-*
+  -bugprone-bool-pointer-implicit-conversion
+  -cert-env33-c
+  -cert-dcl50-cpp
+  -cert-dcl59-cpp
+  -cppcoreguidelines-*
+  -fuchsia-*
+  -google-*
+  google-default-arguments
+  google-explicit-constructor
+  google-runtime-member-string-references
+  google-runtime-operator
+  -hicpp-braces-around-statements
+  -hicpp-named-parameter
+  -hicpp-no-array-decay
+  -hicpp-no-assembler
+  -hicpp-no-malloc
+  -hicpp-function-size
+  -hicpp-special-member-functions
+  -hicpp-vararg
+  -llvm-*
+  -objc-*
+  -readability-else-after-return
+  -readability-implicit-bool-conversion
+  -readability-named-parameter
+  -readability-simplify-boolean-expr
+  -readability-braces-around-statements
+  -readability-identifier-naming
+  -readability-function-size
+  -readability-redundant-member-init
+  -misc-bool-pointer-implicit-conversion
+  -misc-definitions-in-headers
+  -misc-unused-alias-decls
+  -misc-unused-parameters
+  -misc-unused-using-decls
+  -modernize-use-using
+  -modernize-use-default-member-init
+  -clang-diagnostic-*
+  -clang-analyzer-*
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
+AnalyzeTemporaryDtors: false
+FormatStyle:     none
+User:            allonli
+CheckOptions:    
+  - key:             google-readability-braces-around-statements.ShortStatementLines
+    value:           '1'
+  - key:             google-readability-function-size.StatementThreshold
+    value:           '800'
+  - key:             google-readability-namespace-comments.ShortNamespaceLines
+    value:           '10'
+  - key:             google-readability-namespace-comments.SpacesBeforeComments
+    value:           '2'
+  - key:             modernize-loop-convert.MaxCopySize
+    value:           '16'
+  - key:             modernize-loop-convert.MinConfidence
+    value:           reasonable
+  - key:             modernize-loop-convert.NamingStyle
+    value:           CamelCase
+  - key:             modernize-pass-by-value.IncludeStyle
+    value:           llvm
+  - key:             modernize-replace-auto-ptr.IncludeStyle
+    value:           llvm
+  - key:             modernize-use-nullptr.NullMacros
+    value:           'NULL'
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,13 +22,22 @@ repos:
 -   repo: local
    hooks:
-    -   id: clang-format-with-version-check
+    -   id: clang-format
        name: clang-format
        description: Format files with ClangFormat.
-        entry: bash ./tools/pre-commit.hooks/.clang_format.hook -i
+        entry: bash ./tools/pre-commit.hooks/.clang-format.hook -i
        language: system
-        files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
+        files: \.(c|cc|cxx|cpp|h|hpp|hxx)$
+-   repo: local
+    hooks:
+    -   id: clang-tidy
+        name: clang-tidy
+        description: Check C++ code style using clang-tidy.
+        entry: bash ./tools/pre-commit.hooks/.clang-tidy.hook -i
+        language: system
+        files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
+#
 #-   repo: local
 #    hooks:
 #    -   id: copyright_checker

--- a/.travis.yml
+++ b/.travis.yml
@@ -6,26 +6,54 @@ dist: trusty
 os:
  - linux
 env:
-  - JOB=check_style
+  global:
+    - CMAKE_URL=https://cmake.org/files/v3.11/cmake-3.11.1-Linux-x86_64.tar.gz
 addons:
  apt:
+    sources:
+      - llvm-toolchain-trusty-6.0
+      - ubuntu-toolchain-r-test
    packages:
      - git
      - python
      - python-pip
      - python2.7-dev
-      - clang-format-3.8
+      - libc6-i386
+      - clang-6.0
+      - libclang-6.0
+      - llvm-6.0
+      - llvm-6.0-dev
+      - curl
+compiler:
+  - clang
 before_install:
  - sudo pip install -U virtualenv pre-commit pip
+  # Download and install recent cmake
+  - |
+    if [[ ${TRAVIS_OS_NAME} == "linux" ]]; then
+      CMAKE_URL=${CMAKE_URL}
+      mkdir -p ${DEPS_DIR}/cmake
+      travis_retry wget --no-check-certificate --quiet -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C ${DEPS_DIR}/cmake
+      export PATH=${DEPS_DIR}/cmake/bin:${PATH}
+    fi
-script:
+#install:
-  - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
+#  - if [ "$CXX" = "g++" ]; then export CXX="g++-5" CC="gcc-5"; fi
+#  - if [ "$CXX" = "clang++" ]; then export CXX="clang++-6.0" CC="clang-6.0"; fi
+before_script:
  - |
+    echo "cmake generate compile_commands.json for clang-tidy"
+    ls -l -a
+    clang-tidy -version 
+    clang-format -version
+script:
+  - | 
    function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
  - |
-    timeout 600 .travis/${JOB}.sh # 10min timeout
+    timeout 600 .travis/pre-commit-job.sh # 10min timeout
    RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else exit 1; fi;
 notifications:

--- a/.travis/check_style.sh
+++ b/.travis/check_style.sh
 #!/bin/bash
 function abort(){
-    echo "Your change doesn't follow PaddlePaddle's code style" 1>&2
+    echo "Your change doesn't follow Paddle-Moible's code style" 1>&2
    echo "Please use pre-commit to auto-format your code." 1>&2
    exit 1
 }
@@ -11,7 +11,6 @@ cd `dirname $0`
 cd ..
 export PATH=/usr/bin:$PATH
 pre-commit install
-clang-format --version
 if ! pre-commit run -a ; then
  ls -lh

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,8 @@ add_definitions(-DPADDLE_MOBILE_DEBUG="true")
 set(CMAKE_BUILD_TYPE RelWithDebInfo)
-set(CMAKE_VERBOSE_MAKEFILE on)
+set(CMAKE_VERBOSE_MAKEFILE ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)

--- a/build.sh
+++ b/build.sh
 #!/bin/bash
 build_for_linux() {
-    echo "linux"
+	if [ ! `which brew` ]; then
+        echo "building failed! homebrew not found, please install homebrew."
+        return
+    fi
+    if [ ! `which cmake` ]; then
+        echo "installing cmake."
+        brew install cmake
+        if [ ! $? ]; then
+            echo "cmake install failed."
+            return
+        fi
+    fi
+    PLATFORM="x86"
+    MODE="Release"
+    CXX_FLAGS="-std=c++11 -O3 -s"
+    BUILD_DIR=build/release/"${PLATFORM}"
+    mkdir -p ${BUILD_DIR}/build
+    mkdir -p ${BUILD_DIR}/test
+    cp -r test/models ${BUILD_DIR}/test/models
+    cmake . \
+        -B"${BUILD_DIR}" \
+    	-DCMAKE_BUILD_TYPE="${MODE}" \
+    	-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+    	-DIS_MAC=true
+    cd ${BUILD_DIR}
+    make -j 8
 }
 build_for_mac() {

--- a/cmake-build-release/compile_commands.json
+++ b/cmake-build-release/compile_commands.json
+[
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/ddim.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/lod_tensor.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/scope.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/tensor_util.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/memory/t_malloc.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/im2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/math_function.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/vol2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/common/variant.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/attribute.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/block_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/data_transform.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/executor.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/framework.pb.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/op_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/operator.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/paddle_mobile_object.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/program.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/program_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/var_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/io.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/conv_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/elementwise_add_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/elementwise_add_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/mul_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/fpga/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/mul_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/op_param.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/ddim.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/lod_tensor.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/scope.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/tensor_util.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/memory/t_malloc.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/im2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/math_function.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/vol2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/common/variant.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/attribute.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/block_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/data_transform.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/executor.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/framework.pb.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/op_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/operator.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/paddle_mobile_object.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/program.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/program_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/var_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/io.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/conv_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/elementwise_add_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/elementwise_add_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/mul_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/fpga/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/mul_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/op_param.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-log.dir/common/test_log.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/common/test_log.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/common/test_log.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-conv-op.dir/operators/test_cov_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/operators/test_cov_op.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/operators/test_cov_op.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-load.dir/framework/test_load.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/framework/test_load.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/framework/test_load.cpp"
+},
+{
+  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
+  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-test.dir/main.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/main.cpp",
+  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/main.cpp"
+}
+]
\ No newline at end of file
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -27,146 +27,145 @@ SOFTWARE.
 namespace paddle_mobile {
-    enum LogLevel {
+enum LogLevel {
-        kNO_LOG,
+    kNO_LOG,
-        kLOG_ERROR,
+    kLOG_ERROR,
-        kLOG_WARNING,
+    kLOG_WARNING,
-        kLOG_INFO,
+    kLOG_INFO,
-        kLOG_DEBUG,
+    kLOG_DEBUG,
-        kLOG_DEBUG1,
+    kLOG_DEBUG1,
-        kLOG_DEBUG2,
+    kLOG_DEBUG2,
-        kLOG_DEBUG3,
+    kLOG_DEBUG3,
-        kLOG_DEBUG4
+    kLOG_DEBUG4
-    };
+};
-    // log level
+// log level
-    static LogLevel log_level = kLOG_DEBUG4;
+static LogLevel log_level = kLOG_DEBUG4;
-    static std::vector<std::string> logs{"NO",      "ERROR ",  "WARNING",
+static std::vector<std::string> logs{"NO",      "ERROR ",  "WARNING",
-                                         "INFO   ", "DEBUG  ", "DEBUG1 ",
+                                     "INFO   ", "DEBUG  ", "DEBUG1 ",
-                                         "DEBUG2 ", "DEBUG3 ", "DEBUG4 "};
+                                     "DEBUG2 ", "DEBUG3 ", "DEBUG4 "};
-    struct ToLog;
+struct ToLog;
-    struct Print;
+struct Print;
-    struct Print {
+struct Print {
-        friend struct ToLog;
+    friend struct ToLog;
-        template <typename T> Print &operator<<(T const &value) {
+    template <typename T> Print &operator<<(T const &value) {
-            buffer_ << value;
+        buffer_ << value;
-            return *this;
+        return *this;
+    }
+  private:
+    void print(LogLevel level) {
+        buffer_ << std::endl;
+        if (level == kLOG_ERROR) {
+            std::cerr << buffer_.str();
+        } else {
+            std::cout << buffer_.str();
        }
+    }
-      private:
+    std::ostringstream buffer_;
-        void print(LogLevel level) {
+};
-            buffer_ << std::endl;
-            if (level == kLOG_ERROR) {
+struct ToLog {
-                std::cerr << buffer_.str();
+    ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
-            } else {
+        : level_(level) {
-                std::cout << buffer_.str();
+        unsigned blanks =
-            }
+            (unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
-        }
+        printer_ << logs[level] << " " << info << ":"
-        std::ostringstream buffer_;
+                 << std::string(blanks, ' ');
-    };
+    }
-    struct ToLog {
+    template <typename T> ToLog &operator<<(T const &value) {
-        ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
+        printer_ << value;
-            : level_(level) {
+        return *this;
-            unsigned blanks =
+    }
-                (unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
-            printer_ << logs[level] << " " << info << ":"
+    ~ToLog() { printer_.print(level_); }
-                     << std::string(blanks, ' ');
-        }
+  private:
+    LogLevel level_;
-        template <typename T> ToLog &operator<<(T const &value) {
+    Print printer_;
-            printer_ << value;
+};
-            return *this;
-        }
-        ~ToLog() { printer_.print(level_); }
-      private:
-        LogLevel level_;
-        Print printer_;
-    };
 #define LOG(level)                                                             \
    if (level > paddle_mobile::log_level) {                                    \
    } else                                                                     \
-    paddle_mobile::ToLog(                                                      \
+        paddle_mobile::ToLog(                                                  \
-        level,                                                                 \
+            level, (std::stringstream()                                        \
-        (std::stringstream()                                                   \
+                    << "[file: "                                               \
-         << "[file: "                                                          \
+                    << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1)  \
-         << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \
+                                               : __FILE__)                     \
-         << "] [line: " << __LINE__ << "] ")                                   \
+                    << "] [line: " << __LINE__ << "] ")                        \
-            .str())
+                       .str())
 #define DLOG                                                                   \
    if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                \
    } else                                                                     \
-    paddle_mobile::ToLog(                                                      \
+        paddle_mobile::ToLog(                                                  \
-        paddle_mobile::kLOG_DEBUG,                                             \
+            paddle_mobile::kLOG_DEBUG,                                         \
-        (std::stringstream()                                                   \
+            (std::stringstream()                                               \
-         << "[file: "                                                          \
+             << "[file: "                                                      \
-         << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \
+             << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1)         \
-         << "] [line: " << __LINE__ << "] ")                                   \
+                                        : __FILE__)                            \
-            .str())
+             << "] [line: " << __LINE__ << "] ")                               \
-}
+                .str())
+} // namespace paddle_mobile
 #define LOGF(level, format, ...)                                               \
    if (level > paddle_mobile::log_level) {                                    \
    } else                                                                     \
-    printf(format, ##__VA_ARGS__)
+        printf(format, ##__VA_ARGS__)
 #define DLOGF(format, ...)                                                     \
    if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                \
    } else                                                                     \
-    printf(format, ##__VA_ARGS__)
+        printf(format, ##__VA_ARGS__)
 #else
 namespace paddle_mobile {
-    enum LogLevel {
+enum LogLevel {
-        kNO_LOG,
+    kNO_LOG,
-        kLOG_ERROR,
+    kLOG_ERROR,
-        kLOG_WARNING,
+    kLOG_WARNING,
-        kLOG_INFO,
+    kLOG_INFO,
-        kLOG_DEBUG,
+    kLOG_DEBUG,
-        kLOG_DEBUG1,
+    kLOG_DEBUG1,
-        kLOG_DEBUG2,
+    kLOG_DEBUG2,
-        kLOG_DEBUG3,
+    kLOG_DEBUG3,
-        kLOG_DEBUG4
+    kLOG_DEBUG4
-    };
+};
-    struct ToLog;
+struct ToLog;
-    struct Print {
+struct Print {
-        friend struct ToLog;
+    friend struct ToLog;
-        template <typename T> Print &operator<<(T const &value) {}
+    template <typename T> Print &operator<<(T const &value) {}
-      private:
+  private:
-    };
+};
-    struct ToLog {
+struct ToLog {
-        ToLog(LogLevel level) {}
+    ToLog(LogLevel level) {}
-        template <typename T> ToLog &operator<<(T const &value) {
+    template <typename T> ToLog &operator<<(T const &value) { return *this; }
-            return *this;
+};
-        }
-    };
 #define LOG(level)                                                             \
    if (true) {                                                                \
    } else                                                                     \
-    paddle_mobile::ToLog(level)
+        paddle_mobile::ToLog(level)
 #define DLOG                                                                   \
    if (true) {                                                                \
    } else                                                                     \
-    paddle_mobile::ToLog(paddle_mobile::kLOG_DEBUG)
+        paddle_mobile::ToLog(paddle_mobile::kLOG_DEBUG)
 #define LOGF(level, format, ...)
 #define DLOGF(format, ...)
-}
+} // namespace paddle_mobile
 #endif
--- a/src/common/type_define.h
+++ b/src/common/type_define.h
@@ -23,31 +23,30 @@ SOFTWARE.
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype> class OperatorBase;
+template <typename Dtype> class OperatorBase;
-        class OpDesc;
+class OpDesc;
-        class BlockDesc;
+class BlockDesc;
-        class InferShapeContext;
+class InferShapeContext;
-    }
+} // namespace framework
-    using VariableNameMap = std::map<std::string, std::vector<std::string>>;
+using VariableNameMap = std::map<std::string, std::vector<std::string>>;
-    template <typename Dtype>
+template <typename Dtype>
-    using OpCreator = std::function<framework::OperatorBase<Dtype> *(
+using OpCreator = std::function<framework::OperatorBase<Dtype> *(
-        const std::string & /*type*/, const VariableNameMap & /*inputs*/,
+    const std::string & /*type*/, const VariableNameMap & /*inputs*/,
-        const VariableNameMap & /*outputs*/,
+    const VariableNameMap & /*outputs*/,
-        const framework::AttributeMap & /*attrs*/)>;
+    const framework::AttributeMap & /*attrs*/)>;
-    using GradOpMakerFN =
+using GradOpMakerFN =
-        std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
+    std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
-            const framework::OpDesc &,
+        const framework::OpDesc &,
-            const std::unordered_set<std::string> & /*no_grad_set*/,
+        const std::unordered_set<std::string> & /*no_grad_set*/,
-            std::unordered_map<std::string, std::string> * /*grad_to_var*/,
+        std::unordered_map<std::string, std::string> * /*grad_to_var*/,
-            const std::vector<framework::BlockDesc *> &grad_block)>;
+        const std::vector<framework::BlockDesc *> &grad_block)>;
-    using InferVarTypeFN =
+using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
-        std::function<void(const framework::OpDesc & /*op_desc*/,
+                                          framework::BlockDesc * /*block*/)>;
-                           framework::BlockDesc * /*block*/)>;
+using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
-    using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
+}; // namespace paddle_mobile
-};
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -19,45 +19,45 @@ SOFTWARE.
 #pragma once;
 namespace paddle_mobile {
-    enum class Precision : int { FP32 = 0 };
+enum class Precision : int { FP32 = 0 };
-    //! device type
+//! device type
-    enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
+enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
-    template <DeviceTypeEnum T> struct DeviceType {};
+template <DeviceTypeEnum T> struct DeviceType {};
-    typedef DeviceType<kCPU> CPU;
+typedef DeviceType<kCPU> CPU;
-    typedef DeviceType<kFPGA> FPGA;
+typedef DeviceType<kFPGA> FPGA;
-    typedef DeviceType<kGPU_MALI> GPU_MALI;
+typedef DeviceType<kGPU_MALI> GPU_MALI;
-    //! data type
+//! data type
-    enum DataType {
+enum DataType {
-        PM_INVALID = -1,
+    PM_INVALID = -1,
-        PM_HALF = 0,
+    PM_HALF = 0,
-        PM_FLOAT = 1,
+    PM_FLOAT = 1,
-        PM_DOUBLE = 2,
+    PM_DOUBLE = 2,
-        PM_INT8 = 3,
+    PM_INT8 = 3,
-        PM_INT16 = 4,
+    PM_INT16 = 4,
-        PM_INT32 = 5,
+    PM_INT32 = 5,
-        PM_INT64 = 6,
+    PM_INT64 = 6,
-        PM_UINT8 = 7,
+    PM_UINT8 = 7,
-        PM_UINT16 = 8,
+    PM_UINT16 = 8,
-        PM_UINT32 = 9,
+    PM_UINT32 = 9,
-        PM_STRING = 10,
+    PM_STRING = 10,
-        PM_BOOL = 11,
+    PM_BOOL = 11,
-        PM_SHAPE = 12,
+    PM_SHAPE = 12,
-        PM_TENSOR = 13
+    PM_TENSOR = 13
-    };
+};
-    //!
+//!
-    enum PMStatus {
+enum PMStatus {
-        PMSuccess = 0xFF,        /*!< No errors */
+    PMSuccess = 0xFF,        /*!< No errors */
-        PMNotInitialized = 0x01, /*!< Data not initialized. */
+    PMNotInitialized = 0x01, /*!< Data not initialized. */
-        PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
+    PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
-        PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
+    PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
-        PMUnKownError = 0x04,    /*!< Unknown error. */
+    PMUnKownError = 0x04,    /*!< Unknown error. */
-        PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
+    PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
-        PMOutOfMem = 0x06,       /*!< OOM error*/
+    PMOutOfMem = 0x06,       /*!< OOM error*/
-        PMUnImplError = 0x07,    /*!< Unimplement error. */
+    PMUnImplError = 0x07,    /*!< Unimplement error. */
-        PMWrongDevice = 0x08     /*!< un-correct device. */
+    PMWrongDevice = 0x08     /*!< un-correct device. */
-    };
+};
-}
+} // namespace paddle_mobile
--- a/src/common/variant.cpp
+++ b/src/common/variant.cpp
@@ -15,5 +15,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-#include "variant.h"
--- a/src/common/variant.h
+++ b/src/common/variant.h
@@ -21,79 +21,79 @@ SOFTWARE.
 #pragma once
 namespace paddle_mobile {
-    template <int ID, typename Type> struct IDToType { typedef Type type_t; };
+template <int ID, typename Type> struct IDToType { typedef Type type_t; };
-    template <typename F, typename... Ts> struct VariantHelper {
+template <typename F, typename... Ts> struct VariantHelper {
-        static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
+    static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
-                                       ? sizeof(F)
+                                   ? sizeof(F)
-                                       : VariantHelper<Ts...>::size;
+                                   : VariantHelper<Ts...>::size;
-        inline static void Destroy(size_t id, void *data) {
+    inline static void Destroy(size_t id, void *data) {
-            if (id == typeid(F).hash_code()) {
+        if (id == typeid(F).hash_code()) {
-                reinterpret_cast<F *>(data)->~F();
+            reinterpret_cast<F *>(data)->~F();
-            } else {
+        } else {
-                VariantHelper<Ts...>::Destroy(id, data);
+            VariantHelper<Ts...>::Destroy(id, data);
-            }
        }
-    };
+    }
+};
-    template <typename F> struct VariantHelper<F> {
+template <typename F> struct VariantHelper<F> {
-        static const size_t size = sizeof(F);
+    static const size_t size = sizeof(F);
-        inline static void Destroy(size_t id, void *data) {
+    inline static void Destroy(size_t id, void *data) {
-            if (id == typeid(F).hash_code()) {
+        if (id == typeid(F).hash_code()) {
-                //              reinterpret_cast<F*>(data)->~F();
+            //              reinterpret_cast<F*>(data)->~F();
-            } else {
+        } else {
-                //              std::cout << "未匹配到 " << std::endl;
+            //              std::cout << "未匹配到 " << std::endl;
-            }
        }
-    };
+    }
+};
-    template <size_t size> class RawData {
+template <size_t size> class RawData {
-      public:
+  public:
-        char data[size];
+    char data[size];
-        RawData() {}
+    RawData() {}
-        RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
+    RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
-        //      void operator=(const RawData &raw_data){
+    //      void operator=(const RawData &raw_data){
-        //        strcpy(data, raw_data.data);
+    //        strcpy(data, raw_data.data);
-        //      }
+    //      }
-    };
+};
-    template <typename... Ts> struct Variant {
+template <typename... Ts> struct Variant {
-        Variant(const Variant &variant) {
+    Variant(const Variant &variant) {
-            //        std::cout << " 赋值构造函数 " << std::endl;
+        //        std::cout << " 赋值构造函数 " << std::endl;
-            type_id = variant.type_id;
+        type_id = variant.type_id;
-            data = variant.data;
+        data = variant.data;
-        }
+    }
-        Variant() : type_id(invalid_type()) {}
+    Variant() : type_id(invalid_type()) {}
-        ~Variant() {
+    ~Variant() {
-            //        helper::Destroy(type_id, &data);
+        //        helper::Destroy(type_id, &data);
-        }
+    }
-        template <typename T, typename... Args> void Set(Args &&... args) {
+    template <typename T, typename... Args> void Set(Args &&... args) {
-            helper::Destroy(type_id, &data);
+        helper::Destroy(type_id, &data);
-            new (&data) T(std::forward<Args>(args)...);
+        new (&data) T(std::forward<Args>(args)...);
-            type_id = typeid(T).hash_code();
+        type_id = typeid(T).hash_code();
-        }
+    }
-        template <typename T> T &Get() const {
+    template <typename T> T &Get() const {
-            if (type_id == typeid(T).hash_code()) {
+        if (type_id == typeid(T).hash_code()) {
-                return *const_cast<T *>(reinterpret_cast<const T *>(&data));
+            return *const_cast<T *>(reinterpret_cast<const T *>(&data));
-            } else {
+        } else {
-                //      std::cout << " bad cast in variant " << std::endl;
+            //      std::cout << " bad cast in variant " << std::endl;
-                throw std::bad_cast();
+            throw std::bad_cast();
-            }
        }
+    }
-        size_t TypeId() const { return type_id; }
+    size_t TypeId() const { return type_id; }
-      private:
+  private:
-        static inline size_t invalid_type() { return typeid(void).hash_code(); }
+    static inline size_t invalid_type() { return typeid(void).hash_code(); }
-        typedef VariantHelper<Ts...> helper;
+    typedef VariantHelper<Ts...> helper;
-        size_t type_id;
+    size_t type_id;
-        RawData<helper::size> data;
+    RawData<helper::size> data;
-    };
+};
-    template <typename T> struct Vistor { typedef T type_t; };
+template <typename T> struct Vistor { typedef T type_t; };
 } // namespace paddle_mobile
--- a/src/framework/attribute.cpp
+++ b/src/framework/attribute.cpp
@@ -16,8 +16,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-#include "attribute.h"
 namespace paddle_mobile {
-    namespace framework {}
+namespace framework {}
 } // namespace paddle_mobile
--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -22,110 +22,108 @@ SOFTWARE.
 #include "framework.pb.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class BlockDesc;
+class BlockDesc;
-        class Attribute {
+class Attribute {
-          public:
+  public:
-            static Attribute
+    static Attribute GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
-            GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
+        //    std::cout << "begin get attr value" << std::endl;
-                //    std::cout << "begin get attr value" << std::endl;
+        Attribute attr;
-                Attribute attr;
+        switch (attr_desc.type()) {
-                switch (attr_desc.type()) {
+        case proto::AttrType::BOOLEAN: {
-                case proto::AttrType::BOOLEAN: {
+            attr.Set<bool>(attr_desc.b());
-                    attr.Set<bool>(attr_desc.b());
+            break;
-                    break;
+        }
-                }
+        case proto::AttrType::INT: {
-                case proto::AttrType::INT: {
+            attr.Set<int>(attr_desc.i());
-                    attr.Set<int>(attr_desc.i());
+            break;
-                    break;
+        }
-                }
+        case proto::AttrType::FLOAT: {
-                case proto::AttrType::FLOAT: {
+            attr.Set<float>(attr_desc.f());
-                    attr.Set<float>(attr_desc.f());
+            break;
-                    break;
+        }
-                }
+        case proto::AttrType::STRING: {
-                case proto::AttrType::STRING: {
+            attr.Set<std::string>(attr_desc.s());
-                    attr.Set<std::string>(attr_desc.s());
+            break;
-                    break;
+        }
-                }
+        case proto::AttrType::BOOLEANS: {
-                case proto::AttrType::BOOLEANS: {
+            std::vector<bool> val(attr_desc.bools_size());
-                    std::vector<bool> val(attr_desc.bools_size());
+            for (int i = 0; i < attr_desc.bools_size(); ++i) {
-                    for (int i = 0; i < attr_desc.bools_size(); ++i) {
+                val[i] = attr_desc.bools(i);
-                        val[i] = attr_desc.bools(i);
-                    }
-                    attr.Set<std::vector<bool>>(val);
-                    break;
-                }
-                case proto::AttrType::INTS: {
-                    std::vector<int> val(attr_desc.ints_size());
-                    for (int i = 0; i < attr_desc.ints_size(); ++i) {
-                        val[i] = attr_desc.ints(i);
-                    }
-                    attr.Set<std::vector<int>>(val);
-                    break;
-                }
-                case proto::AttrType::FLOATS: {
-                    std::vector<float> val(attr_desc.floats_size());
-                    for (int i = 0; i < attr_desc.floats_size(); ++i) {
-                        val[i] = attr_desc.floats(i);
-                    }
-                    attr.Set<std::vector<float>>(val);
-                    break;
-                }
-                case proto::AttrType::STRINGS: {
-                    std::vector<std::string> val(attr_desc.strings_size());
-                    for (int i = 0; i < attr_desc.strings_size(); ++i) {
-                        val[i] = attr_desc.strings(i);
-                    }
-                    attr.Set<std::vector<std::string>>(val);
-                    break;
-                }
-                case proto::AttrType::LONG: {
-                    attr.Set<int64_t>(attr_desc.l());
-                    break;
-                }
-                default:
-                    //        std::cout << " not support " << std::endl;
-                    break;
-                }
-                //    std::cout << "end get attr value" << std::endl;
-                return attr;
            }
+            attr.Set<std::vector<bool>>(val);
-            Attribute() {}
+            break;
-            template <typename T, typename... Args>
+        }
-            Attribute &Set(Args &&... args) {
+        case proto::AttrType::INTS: {
-                variant_.Set<T>(args...);
+            std::vector<int> val(attr_desc.ints_size());
-                return *this;
+            for (int i = 0; i < attr_desc.ints_size(); ++i) {
+                val[i] = attr_desc.ints(i);
+            }
+            attr.Set<std::vector<int>>(val);
+            break;
+        }
+        case proto::AttrType::FLOATS: {
+            std::vector<float> val(attr_desc.floats_size());
+            for (int i = 0; i < attr_desc.floats_size(); ++i) {
+                val[i] = attr_desc.floats(i);
+            }
+            attr.Set<std::vector<float>>(val);
+            break;
+        }
+        case proto::AttrType::STRINGS: {
+            std::vector<std::string> val(attr_desc.strings_size());
+            for (int i = 0; i < attr_desc.strings_size(); ++i) {
+                val[i] = attr_desc.strings(i);
            }
+            attr.Set<std::vector<std::string>>(val);
+            break;
+        }
+        case proto::AttrType::LONG: {
+            attr.Set<int64_t>(attr_desc.l());
+            break;
+        }
+        default:
+            //        std::cout << " not support " << std::endl;
+            break;
+        }
+        //    std::cout << "end get attr value" << std::endl;
+        return attr;
+    }
-            template <typename T> T &Get() const { return variant_.Get<T>(); }
+    Attribute() {}
+    template <typename T, typename... Args> Attribute &Set(Args &&... args) {
+        variant_.Set<T>(args...);
+        return *this;
+    }
-          private:
+    template <typename T> T &Get() const { return variant_.Get<T>(); }
-            Variant<int, float, std::string, std::vector<int>,
-                    std::vector<float>, std::vector<std::string>, bool,
-                    std::vector<bool>, BlockDesc *, int64_t>
-                variant_;
-        };
-        using AttributeMap = std::unordered_map<std::string, Attribute>;
+  private:
+    Variant<int, float, std::string, std::vector<int>, std::vector<float>,
+            std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
+            int64_t>
+        variant_;
+};
-        class AttrReader {
+using AttributeMap = std::unordered_map<std::string, Attribute>;
-          public:
-            explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
-            template <typename T> inline T Get(const std::string &name) const {
+class AttrReader {
-                //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
+  public:
-                //          be in
+    explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
-                //          AttributeMap",
-                //                         name);
+    template <typename T> inline T Get(const std::string &name) const {
-                return ((Attribute)attrs_.at(name)).Get<T>();
+        //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
-            }
+        //          be in
+        //          AttributeMap",
+        //                         name);
+        return ((Attribute)attrs_.at(name)).Get<T>();
+    }
-          private:
+  private:
-            const AttributeMap &attrs_;
+    const AttributeMap &attrs_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/block_desc.cpp
+++ b/src/framework/block_desc.cpp
@@ -19,32 +19,32 @@ SOFTWARE.
 #include "block_desc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
+std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
-            std::vector<std::shared_ptr<VarDesc>> res;
+    std::vector<std::shared_ptr<VarDesc>> res;
-            for (const auto &p : vars_) {
+    for (const auto &p : vars_) {
-                res.push_back(p.second);
+        res.push_back(p.second);
-            }
+    }
-            return res;
+    return res;
-        }
+}
-        std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const {
+std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const {
-            std::vector<std::shared_ptr<OpDesc>> res;
+    std::vector<std::shared_ptr<OpDesc>> res;
-            for (const auto &op : ops_) {
+    for (const auto &op : ops_) {
-                res.push_back(op);
+        res.push_back(op);
-            }
+    }
-            return res;
+    return res;
-        }
+}
-        BlockDesc::BlockDesc(const proto::BlockDesc &desc) : desc_(desc) {
+BlockDesc::BlockDesc(const proto::BlockDesc &desc) : desc_(desc) {
-            for (const proto::VarDesc &var_desc : desc_.vars()) {
+    for (const proto::VarDesc &var_desc : desc_.vars()) {
-                vars_[var_desc.name()].reset(new VarDesc(var_desc));
+        vars_[var_desc.name()].reset(new VarDesc(var_desc));
-            }
+    }
-            for (const proto::OpDesc &op_desc : desc_.ops()) {
+    for (const proto::OpDesc &op_desc : desc_.ops()) {
-                ops_.emplace_back(new framework::OpDesc(op_desc));
+        ops_.emplace_back(new framework::OpDesc(op_desc));
-            }
+    }
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/block_desc.h
+++ b/src/framework/block_desc.h
@@ -24,50 +24,47 @@ SOFTWARE.
 #include "var_desc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class BlockDesc : PaddleMobileObject {
+class BlockDesc : PaddleMobileObject {
-          public:
+  public:
-            BlockDesc(const proto::BlockDesc &desc);
+    BlockDesc(const proto::BlockDesc &desc);
-            const int &ID() const { return desc_.idx(); }
+    const int &ID() const { return desc_.idx(); }
-            const int &Parent() const { return desc_.parent_idx(); }
+    const int &Parent() const { return desc_.parent_idx(); }
-            bool operator==(
+    bool operator==(const paddle_mobile::framework::BlockDesc &in_block) const {
-                const paddle_mobile::framework::BlockDesc &in_block) const {
+        return this->ID() == in_block.ID() &&
-                return this->ID() == in_block.ID() &&
+               this->Parent() == in_block.Parent();
-                       this->Parent() == in_block.Parent();
+    }
-            }
-            bool operator<(
+    bool operator<(const paddle_mobile::framework::BlockDesc &in_block) const {
-                const paddle_mobile::framework::BlockDesc &in_block) const {
+        return this->ID() < in_block.ID() && this->Parent() < in_block.Parent();
-                return this->ID() < in_block.ID() &&
+    }
-                       this->Parent() < in_block.Parent();
-            }
-            std::vector<std::shared_ptr<VarDesc>> Vars() const;
+    std::vector<std::shared_ptr<VarDesc>> Vars() const;
-            std::vector<std::shared_ptr<OpDesc>> Ops() const;
+    std::vector<std::shared_ptr<OpDesc>> Ops() const;
-          private:
+  private:
-            proto::BlockDesc desc_;
+    proto::BlockDesc desc_;
-            std::vector<std::shared_ptr<OpDesc>> ops_;
+    std::vector<std::shared_ptr<OpDesc>> ops_;
-            std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
+    std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
 namespace std {
-    template <> struct hash<paddle_mobile::framework::BlockDesc> {
+template <> struct hash<paddle_mobile::framework::BlockDesc> {
-        typedef paddle_mobile::framework::BlockDesc argument_type;
+    typedef paddle_mobile::framework::BlockDesc argument_type;
-        typedef std::size_t result_type;
+    typedef std::size_t result_type;
-        result_type operator()(argument_type const &s) const noexcept {
+    result_type operator()(argument_type const &s) const noexcept {
-            result_type const h1(std::hash<int>{}(s.ID()));
+        result_type const h1(std::hash<int>{}(s.ID()));
-            result_type const h2(std::hash<int>{}(s.ID()));
+        result_type const h2(std::hash<int>{}(s.ID()));
-            return h1 ^ (h2 << 1);
+        return h1 ^ (h2 << 1);
-        }
+    }
-    };
+};
 } // namespace std
--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -19,50 +19,49 @@ limitations under the License. */
 #include <string>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        enum class DataLayout {
+enum class DataLayout {
-            kNHWC = 0,
+    kNHWC = 0,
-            kNCHW = 1,
+    kNCHW = 1,
-            kAnyLayout = 2,
+    kAnyLayout = 2,
-        };
+};
-        inline DataLayout StringToDataLayout(const std::string &str) {
+inline DataLayout StringToDataLayout(const std::string &str) {
-            std::string s(str);
+    std::string s(str);
-            for (size_t i = 0; i < s.size(); ++i) {
+    for (size_t i = 0; i < s.size(); ++i) {
-                s[i] = toupper(s[i]);
+        s[i] = toupper(s[i]);
-            }
+    }
-            if (s == "NHWC") {
+    if (s == "NHWC") {
-                return DataLayout::kNHWC;
+        return DataLayout::kNHWC;
-            } else if (s == "NCHW") {
+    } else if (s == "NCHW") {
-                return DataLayout::kNCHW;
+        return DataLayout::kNCHW;
-            } else if (s == "ANYLAYOUT") {
+    } else if (s == "ANYLAYOUT") {
-                return DataLayout::kAnyLayout;
+        return DataLayout::kAnyLayout;
-            } else {
+    } else {
-                //    std::cout << "Unknown storage order string: %s", s;
+        //    std::cout << "Unknown storage order string: %s", s;
-            }
+    }
-        }
+}
-        inline std::string DataLayoutToString(const DataLayout &data_layout) {
+inline std::string DataLayoutToString(const DataLayout &data_layout) {
-            switch (data_layout) {
+    switch (data_layout) {
-            case DataLayout::kNHWC:
+    case DataLayout::kNHWC:
-                return "NHWC";
+        return "NHWC";
-            case DataLayout::kNCHW:
+    case DataLayout::kNCHW:
-                return "NCHW";
+        return "NCHW";
-            case DataLayout::kAnyLayout:
+    case DataLayout::kAnyLayout:
-                return "ANY_LAYOUT";
+        return "ANY_LAYOUT";
-            default:
+    default:
-                break;
+        break;
-                //      std::cout << "unknown DataLayou %d", data_layout;
+        //      std::cout << "unknown DataLayou %d", data_layout;
-            }
+    }
-        }
+}
-        inline std::ostream &operator<<(std::ostream &out,
+inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
-                                        const DataLayout &l) {
+    out << DataLayoutToString(l);
-            out << DataLayoutToString(l);
+    return out;
-            return out;
+}
-        }
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/data_transform.cpp
+++ b/src/framework/data_transform.cpp
@@ -21,72 +21,72 @@ SOFTWARE.
 #include "data_transform.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        static void PassTensorData(Tensor *from, Tensor *to) {
+static void PassTensorData(Tensor *from, Tensor *to) {
-            to->ShareDataWith(*from);
+    to->ShareDataWith(*from);
-            *from = Tensor();
+    *from = Tensor();
-        }
+}
-        void DataTransform(const OpKernelType &expected_kernel_type,
+void DataTransform(const OpKernelType &expected_kernel_type,
-                           const OpKernelType &kernel_type_for_var,
+                   const OpKernelType &kernel_type_for_var,
-                           const Tensor &input_tensor, Tensor *output_tensor) {
+                   const Tensor &input_tensor, Tensor *output_tensor) {
-            bool transformed = false;
+    bool transformed = false;
-            Tensor in;
+    Tensor in;
-            in.ShareDataWith(input_tensor);
+    in.ShareDataWith(input_tensor);
-            Tensor out;
+    Tensor out;
-            //  // do layout transform
+    //  // do layout transform
-            //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
+    //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-            //                          kernel_type_for_var.data_layout_)) {
+    //                          kernel_type_for_var.data_layout_)) {
-            //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
+    //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-            //    &out);
+    //    &out);
-            //    transformed = true;
+    //    transformed = true;
-            //    PassTensorData(&out, &in);
+    //    PassTensorData(&out, &in);
-            //  }
+    //  }
-            //
+    //
-            //  // do data type transform
+    //  // do data type transform
-            //  if (expected_kernel_type.data_type_ !=
+    //  if (expected_kernel_type.data_type_ !=
-            //  kernel_type_for_var.data_type_) {
+    //  kernel_type_for_var.data_type_) {
-            //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
+    //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-            //    &out);
+    //    &out);
-            //    transformed = true;
+    //    transformed = true;
-            //    PassTensorData(&out, &in);
+    //    PassTensorData(&out, &in);
-            //  }
+    //  }
-            //
+    //
-            //  // do device transform
+    //  // do device transform
-            //  if (!platform::is_same_place(kernel_type_for_var.place_,
+    //  if (!platform::is_same_place(kernel_type_for_var.place_,
-            //                               expected_kernel_type.place_)) {
+    //                               expected_kernel_type.place_)) {
-            //    TransDataDevice(in, expected_kernel_type.place_, &out);
+    //    TransDataDevice(in, expected_kernel_type.place_, &out);
-            //    transformed = true;
+    //    transformed = true;
-            //    PassTensorData(&out, &in);
+    //    PassTensorData(&out, &in);
-            //  }
+    //  }
-            //
+    //
-            //  PADDLE_ENFORCE(transformed, "No transform is applied, please
+    //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-            //  check!");
+    //  check!");
-            // get output data
+    // get output data
-            output_tensor->ShareDataWith(in);
+    output_tensor->ShareDataWith(in);
-        }
+}
-        void CopyVariableWithTensor(const Variable &in_var,
+void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                                    const Tensor &tensor, Variable &out_var) {
+                            Variable &out_var) {
-            //  if (in_var.IsType<LoDTensor>()) {
+    //  if (in_var.IsType<LoDTensor>()) {
-            //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
+    //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-            //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
+    //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-            //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
+    //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-            //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
+    //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-            //    tran_lod_tensor->ShareDataWith(tensor);
+    //    tran_lod_tensor->ShareDataWith(tensor);
-            //  } else if (in_var.IsType<SelectedRows>()) {
+    //  } else if (in_var.IsType<SelectedRows>()) {
-            //    auto& in_selected_rows = in_var.Get<SelectedRows>();
+    //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-            //    auto* trans_selected_rows =
+    //    auto* trans_selected_rows =
-            //    out_var.GetMutable<SelectedRows>();
+    //    out_var.GetMutable<SelectedRows>();
-            //    trans_selected_rows->set_height(in_selected_rows.height());
+    //    trans_selected_rows->set_height(in_selected_rows.height());
-            //    trans_selected_rows->set_rows(in_selected_rows.rows());
+    //    trans_selected_rows->set_rows(in_selected_rows.rows());
-            //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
+    //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-            //  } else {
+    //  } else {
-            //    PADDLE_THROW("unknown var type");
+    //    PADDLE_THROW("unknown var type");
-            //  }
+    //  }
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/data_transform.h
+++ b/src/framework/data_transform.h
@@ -28,14 +28,14 @@ SOFTWARE.
 #include "variable.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        void DataTransform(const OpKernelType &expected_kernel_type,
+void DataTransform(const OpKernelType &expected_kernel_type,
-                           const OpKernelType &kernel_type_for_var,
+                   const OpKernelType &kernel_type_for_var,
-                           const Tensor &input_tensor, Tensor *out);
+                   const Tensor &input_tensor, Tensor *out);
-        void CopyVariableWithTensor(const Variable &in_var,
+void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
-                                    const Tensor &tensor, Variable &out_var);
+                            Variable &out_var);
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/data_type.h
+++ b/src/framework/data_type.h
@@ -21,23 +21,23 @@ SOFTWARE.
 #include "framework.pb.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        //    inline proto::VarType::Type ToDataType(std::type_index type) {
+//    inline proto::VarType::Type ToDataType(std::type_index type) {
-        //        using namespace paddle_mobile::framework::proto;
+//        using namespace paddle_mobile::framework::proto;
-        //        if (typeid(float).hash_code() == type.hash_code()) {
+//        if (typeid(float).hash_code() == type.hash_code()) {
-        //            return proto::VarType::FP32;
+//            return proto::VarType::FP32;
-        //        } else if (typeid(double).hash_code() == type.hash_code()) {
+//        } else if (typeid(double).hash_code() == type.hash_code()) {
-        //            return proto::VarType::FP64;
+//            return proto::VarType::FP64;
-        //        } else if (typeid(int).hash_code() == type.hash_code()) {
+//        } else if (typeid(int).hash_code() == type.hash_code()) {
-        //            return proto::VarType::INT32;
+//            return proto::VarType::INT32;
-        //        } else if (typeid(int64_t).hash_code() == type.hash_code()) {
+//        } else if (typeid(int64_t).hash_code() == type.hash_code()) {
-        //            return proto::VarType::INT64;
+//            return proto::VarType::INT64;
-        //        } else if (typeid(bool).hash_code() == type.hash_code()) {
+//        } else if (typeid(bool).hash_code() == type.hash_code()) {
-        //            return proto::VarType::BOOL;
+//            return proto::VarType::BOOL;
-        //        } else {
+//        } else {
-        ////            PADDLE_THROW("Not supported");
+////            PADDLE_THROW("Not supported");
-        //        }
+//        }
-        //    }
+//    }
-    }
+}
 } // namespace paddle_mobile
--- a/src/framework/ddim.cc
+++ b/src/framework/ddim.cc
@@ -15,320 +15,318 @@ limitations under the License. */
 #include "ddim.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        /// @cond HIDDEN
+/// @cond HIDDEN
-        template <int i> Dim<i> make_dim(const int64_t *d) {
+template <int i> Dim<i> make_dim(const int64_t *d) {
-            return Dim<i>(*d, make_dim<i - 1>(d + 1));
+    return Dim<i>(*d, make_dim<i - 1>(d + 1));
-        }
+}
-        template <> Dim<0> make_dim<0>(const int64_t *d) { return Dim<0>(*d); }
+template <> Dim<0> make_dim<0>(const int64_t *d) { return Dim<0>(*d); }
-        void make_ddim(DDim &ddim, const int64_t *dims, int n) {
+void make_ddim(DDim &ddim, const int64_t *dims, int n) {
-            switch (n) {
+    switch (n) {
-            case 0:
+    case 0:
-                ddim = make_dim<0>(dims);
+        ddim = make_dim<0>(dims);
-                break;
+        break;
-            case 1:
+    case 1:
-                ddim = make_dim<1>(dims);
+        ddim = make_dim<1>(dims);
-                break;
+        break;
-            case 2:
+    case 2:
-                ddim = make_dim<2>(dims);
+        ddim = make_dim<2>(dims);
-                break;
+        break;
-            case 3:
+    case 3:
-                ddim = make_dim<3>(dims);
+        ddim = make_dim<3>(dims);
-                break;
+        break;
-            case 4:
+    case 4:
-                ddim = make_dim<4>(dims);
+        ddim = make_dim<4>(dims);
-                break;
+        break;
-            case 5:
+    case 5:
-                ddim = make_dim<5>(dims);
+        ddim = make_dim<5>(dims);
-                break;
+        break;
-            case 6:
+    case 6:
-                ddim = make_dim<6>(dims);
+        ddim = make_dim<6>(dims);
-                break;
+        break;
-            case 7:
+    case 7:
-                ddim = make_dim<7>(dims);
+        ddim = make_dim<7>(dims);
-                break;
+        break;
-            case 8:
+    case 8:
-                ddim = make_dim<8>(dims);
+        ddim = make_dim<8>(dims);
-                break;
+        break;
-            case 9:
+    case 9:
-                ddim = make_dim<9>(dims);
+        ddim = make_dim<9>(dims);
-                break;
+        break;
-            default:
+    default:
-                //      std::cout << "Dynamic dimensions must have between [1,
+        //      std::cout << "Dynamic dimensions must have between [1,
-                //      9]
+        //      9]
-                //      dimensions.";
+        //      dimensions.";
-                break;
+        break;
-            }
+    }
-        }
+}
-        /// @endcond
+/// @endcond
-        DDim make_ddim(std::initializer_list<int64_t> dims) {
+DDim make_ddim(std::initializer_list<int64_t> dims) {
-            DDim result(make_dim(0));
+    DDim result(make_dim(0));
-            make_ddim(result, dims.begin(), dims.size());
+    make_ddim(result, dims.begin(), dims.size());
-            return result;
+    return result;
-        }
+}
-        DDim make_ddim(const std::vector<int64_t> &dims) {
+DDim make_ddim(const std::vector<int64_t> &dims) {
-            DDim result(make_dim(0));
+    DDim result(make_dim(0));
-            make_ddim(result, &dims[0], dims.size());
+    make_ddim(result, &dims[0], dims.size());
-            return result;
+    return result;
-        }
+}
-        DDim make_ddim(const std::vector<int> &dims) {
+DDim make_ddim(const std::vector<int> &dims) {
-            std::vector<int64_t> res(dims.size());
+    std::vector<int64_t> res(dims.size());
-            std::transform(dims.begin(), dims.end(), res.begin(),
+    std::transform(dims.begin(), dims.end(), res.begin(),
-                           [](int d) { return static_cast<int64_t>(d); });
+                   [](int d) { return static_cast<int64_t>(d); });
-            return make_ddim(res);
+    return make_ddim(res);
-        }
+}
-        /// @cond HIDDEN
+/// @cond HIDDEN
-        // XXX For some reason, putting this in an anonymous namespace causes
+// XXX For some reason, putting this in an anonymous namespace causes
-        // errors
+// errors
-        struct DynamicMutableIndexer : Vistor<int64_t &> {
+struct DynamicMutableIndexer : Vistor<int64_t &> {
-          public:
+  public:
-            explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
+    explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
-            template <int D> int64_t &operator()(Dim<D> &dim) const {
+    template <int D> int64_t &operator()(Dim<D> &dim) const {
-                return dim[idx_];
+        return dim[idx_];
-            }
+    }
-          private:
+  private:
-            int idx_;
+    int idx_;
-        };
+};
-        struct DynamicConstIndexer : public Vistor<int64_t> {
+struct DynamicConstIndexer : public Vistor<int64_t> {
-          public:
+  public:
-            explicit DynamicConstIndexer(int idx) : idx_(idx) {}
+    explicit DynamicConstIndexer(int idx) : idx_(idx) {}
-            template <int D> int64_t operator()(const Dim<D> &dim) const {
+    template <int D> int64_t operator()(const Dim<D> &dim) const {
-                return dim[idx_];
+        return dim[idx_];
-            }
+    }
-          private:
+  private:
-            int idx_;
+    int idx_;
-        };
+};
-        /// @endcond
+/// @endcond
-        int64_t &DDim::operator[](int idx) {
+int64_t &DDim::operator[](int idx) {
-            return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
+    return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
-        }
+}
-        int64_t DDim::operator[](int idx) const {
+int64_t DDim::operator[](int idx) const {
-            return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
+    return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
-        }
+}
-        int DDim::size() const { return arity(*this); }
+int DDim::size() const { return arity(*this); }
-        bool DDim::operator==(DDim d) const {
+bool DDim::operator==(DDim d) const {
-            //  if (var.which() != d.getVar().which()) {
+    //  if (var.which() != d.getVar().which()) {
-            //    return false;
+    //    return false;
-            //  } else {
+    //  } else {
-            std::vector<int64_t> v1 = vectorize(*this);
+    std::vector<int64_t> v1 = vectorize(*this);
-            std::vector<int64_t> v2 = vectorize(d);
+    std::vector<int64_t> v2 = vectorize(d);
-            for (unsigned int i = 0; i < v1.size(); i++) {
+    for (unsigned int i = 0; i < v1.size(); i++) {
-                if (v1[i] != v2[i]) {
+        if (v1[i] != v2[i]) {
-                    return false;
+            return false;
-                }
-            }
-            return true;
-            //  }
-        }
-        bool DDim::operator!=(DDim d) const { return !(*this == d); }
-        DDim DDim::operator+(DDim d) const {
-            std::vector<int64_t> v1 = vectorize(*this);
-            std::vector<int64_t> v2 = vectorize(d);
-            std::vector<int64_t> v3;
-            assert(v1.size() == v2.size());
-            for (unsigned int i = 0; i < v1.size(); i++) {
-                v3.push_back(v1[i] + v2[i]);
-            }
-            return make_ddim(v3);
-        }
-        DDim DDim::operator*(DDim d) const {
-            std::vector<int64_t> v1 = vectorize(*this);
-            std::vector<int64_t> v2 = vectorize(d);
-            std::vector<int64_t> v3;
-            assert(v1.size() == v2.size());
-            for (unsigned int i = 0; i < v1.size(); i++) {
-                v3.push_back(v1[i] * v2[i]);
-            }
-            return make_ddim(v3);
        }
+    }
-        int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
+    return true;
+    //  }
-        void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
+}
-        /// @cond HIDDEN
+bool DDim::operator!=(DDim d) const { return !(*this == d); }
-        struct VectorizeVisitor : Vistor<void> {
-            std::vector<int64_t> &vector;
+DDim DDim::operator+(DDim d) const {
+    std::vector<int64_t> v1 = vectorize(*this);
-            explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}
+    std::vector<int64_t> v2 = vectorize(d);
-            template <typename T> void operator()(const T &t) {
+    std::vector<int64_t> v3;
-                vector.push_back(t.head);
-                this->operator()(t.tail);
+    assert(v1.size() == v2.size());
-            }
+    for (unsigned int i = 0; i < v1.size(); i++) {
-            void operator()(const Dim<0> &t) {}
+        v3.push_back(v1[i] + v2[i]);
-        };
+    }
-        /// @endcond
+    return make_ddim(v3);
-        std::vector<int64_t> vectorize(const DDim &ddim) {
+}
-            std::vector<int64_t> result;
-            VectorizeVisitor visitor(result);
+DDim DDim::operator*(DDim d) const {
-            DDim::ApplyVistor(visitor, ddim);
+    std::vector<int64_t> v1 = vectorize(*this);
-            return result;
+    std::vector<int64_t> v2 = vectorize(d);
+    std::vector<int64_t> v3;
+    assert(v1.size() == v2.size());
+    for (unsigned int i = 0; i < v1.size(); i++) {
+        v3.push_back(v1[i] * v2[i]);
+    }
+    return make_ddim(v3);
+}
+int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
+void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
+/// @cond HIDDEN
+struct VectorizeVisitor : Vistor<void> {
+    std::vector<int64_t> &vector;
+    explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}
+    template <typename T> void operator()(const T &t) {
+        vector.push_back(t.head);
+        this->operator()(t.tail);
+    }
+    void operator()(const Dim<0> &t) {}
+};
+/// @endcond
+std::vector<int64_t> vectorize(const DDim &ddim) {
+    std::vector<int64_t> result;
+    VectorizeVisitor visitor(result);
+    DDim::ApplyVistor(visitor, ddim);
+    return result;
+}
+// NOTE: framework::vectorize converts to type int64_t
+//       which does not fit cudnn inputs.
+std::vector<int> vectorize2int(const DDim &ddim) {
+    std::vector<int64_t> temp = vectorize(ddim);
+    std::vector<int> result(temp.begin(), temp.end());
+    return result;
+}
+struct ProductVisitor : Vistor<int64_t> {
+    template <int D> int64_t operator()(const Dim<D> &dim) {
+        return product(dim);
+    }
+};
+int64_t product(const DDim &ddim) {
+    ProductVisitor visitor;
+    return DDim::ApplyVistor(visitor, ddim);
+}
+struct SliceVectorizeVisitor : Vistor<void> {
+    std::vector<int64_t> &vector;
+    int begin;
+    int end;
+    SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
+        : vector(v), begin(b), end(e) {
+        //    PADDLE_ENFORCE(begin < end,
+        //                   "Begin index must be less than end index in
+        //                   ddim
+        //                   slice.");
+        //    PADDLE_ENFORCE(begin >= 0,
+        //                   "Begin index can't be less than zero in
+        //                   ddim slice.");
+    }
+    template <int S> void operator()(const Dim<S> &dim) {
+        if (begin == 0) {
+            vector.push_back(dim.head);
+        } else {
+            --begin;
        }
+        --end;
-        // NOTE: framework::vectorize converts to type int64_t
+        if (end > 0) {
-        //       which does not fit cudnn inputs.
+            this->operator()(dim.tail);
-        std::vector<int> vectorize2int(const DDim &ddim) {
-            std::vector<int64_t> temp = vectorize(ddim);
-            std::vector<int> result(temp.begin(), temp.end());
-            return result;
        }
+    }
-        struct ProductVisitor : Vistor<int64_t> {
-            template <int D> int64_t operator()(const Dim<D> &dim) {
+    void operator()(const Dim<0> &dim) {
-                return product(dim);
+        //    PADDLE_ENFORCE(end == 0, "End index in ddim slice is out
-            }
+        //    of bound.");
-        };
+    }
+};
-        int64_t product(const DDim &ddim) {
-            ProductVisitor visitor;
+DDim slice_ddim(const DDim &ddim, int begin, int end) {
-            return DDim::ApplyVistor(visitor, ddim);
+    std::vector<int64_t> vec;
-        }
+    vec.reserve(end - begin);
+    SliceVectorizeVisitor visitor(vec, begin, end);
-        struct SliceVectorizeVisitor : Vistor<void> {
+    //  boost::apply_visitor(visitor, dim);
-            std::vector<int64_t> &vector;
+    DDim::ApplyVistor(visitor, ddim);
-            int begin;
+    //  visitor(ddim.var.Get<Dim<4>>());
-            int end;
+    return make_ddim(vec);
+}
-            SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
-                : vector(v), begin(b), end(e) {
+/// \cond HIDDEN
-                //    PADDLE_ENFORCE(begin < end,
-                //                   "Begin index must be less than end index in
+struct ArityVisitor : Vistor<int> {
-                //                   ddim
+    template <int D> int operator()(Dim<D>) const { return D; }
-                //                   slice.");
+};
-                //    PADDLE_ENFORCE(begin >= 0,
-                //                   "Begin index can't be less than zero in
+/// \endcond
-                //                   ddim slice.");
-            }
+int arity(const DDim &d) {
+    ArityVisitor arityVisitor = ArityVisitor();
-            template <int S> void operator()(const Dim<S> &dim) {
+    return DDim::ApplyVistor(arityVisitor, d);
-                if (begin == 0) {
+    //  return arityVisitor(d.var.Get<Dim<4>>());
-                    vector.push_back(dim.head);
+    //  return boost::apply_visitor(ArityVisitor(), d); }
-                } else {
+}
-                    --begin;
+/// \cond HIDDEN
-                }
-                --end;
+/// \endcond
-                if (end > 0) {
-                    this->operator()(dim.tail);
+struct OSVistor : Vistor<std::ostream &> {
-                }
+    OSVistor(std::ostream &os) : os_(os) {}
-            }
+    template <int D> std::ostream &operator()(Dim<D> dim) const {
-            void operator()(const Dim<0> &dim) {
+        return os_ << dim;
-                //    PADDLE_ENFORCE(end == 0, "End index in ddim slice is out
+    }
-                //    of bound.");
-            }
+  private:
-        };
+    std::ostream &os_;
+};
-        DDim slice_ddim(const DDim &ddim, int begin, int end) {
-            std::vector<int64_t> vec;
+std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
-            vec.reserve(end - begin);
+    auto vistor = OSVistor(os);
-            SliceVectorizeVisitor visitor(vec, begin, end);
+    DDim::ApplyVistor(vistor, ddim);
-            //  boost::apply_visitor(visitor, dim);
+    return os;
-            DDim::ApplyVistor(visitor, ddim);
+}
-            //  visitor(ddim.var.Get<Dim<4>>());
-            return make_ddim(vec);
+DDim::DDim(std::initializer_list<int64_t> init_list) {
-        }
+    *this = make_ddim(init_list);
+}
-        /// \cond HIDDEN
+DDim flatten_to_2d(const DDim &src, int num_col_dims) {
-        struct ArityVisitor : Vistor<int> {
+    int rank = src.size();
-            template <int D> int operator()(Dim<D>) const { return D; }
+    return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
-        };
+                      product(slice_ddim(src, num_col_dims, rank))});
+}
-        /// \endcond
+DDim flatten_to_1d(const DDim &src) { return make_ddim({product(src)}); }
-        int arity(const DDim &d) {
-            ArityVisitor arityVisitor = ArityVisitor();
+DDim stride(const DDim &ddim) {
-            return DDim::ApplyVistor(arityVisitor, d);
+    std::vector<int64_t> strides(ddim.size());
-            //  return arityVisitor(d.var.Get<Dim<4>>());
+    strides[ddim.size() - 1] = 1;
-            //  return boost::apply_visitor(ArityVisitor(), d); }
+    for (int i = ddim.size() - 2; i >= 0; --i) {
-        }
+        strides[i] = strides[i + 1] * ddim[i + 1];
-        /// \cond HIDDEN
+    }
+    return framework::make_ddim(strides);
-        /// \endcond
+}
-        struct OSVistor : Vistor<std::ostream &> {
+DDim stride_numel(const framework::DDim &ddim) {
-            OSVistor(std::ostream &os) : os_(os) {}
+    std::vector<int64_t> strides(ddim.size());
+    strides[ddim.size() - 1] = ddim[ddim.size() - 1];
-            template <int D> std::ostream &operator()(Dim<D> dim) const {
+    for (int i = ddim.size() - 2; i >= 0; --i) {
-                return os_ << dim;
+        strides[i] = strides[i + 1] * ddim[i];
-            }
+    }
+    return framework::make_ddim(strides);
-          private:
+}
-            std::ostream &os_;
-        };
+} // namespace framework
-        std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
-            auto vistor = OSVistor(os);
-            DDim::ApplyVistor(vistor, ddim);
-            return os;
-        }
-        DDim::DDim(std::initializer_list<int64_t> init_list) {
-            *this = make_ddim(init_list);
-        }
-        DDim flatten_to_2d(const DDim &src, int num_col_dims) {
-            int rank = src.size();
-            return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
-                              product(slice_ddim(src, num_col_dims, rank))});
-        }
-        DDim flatten_to_1d(const DDim &src) {
-            return make_ddim({product(src)});
-        }
-        DDim stride(const DDim &ddim) {
-            std::vector<int64_t> strides(ddim.size());
-            strides[ddim.size() - 1] = 1;
-            for (int i = ddim.size() - 2; i >= 0; --i) {
-                strides[i] = strides[i + 1] * ddim[i + 1];
-            }
-            return framework::make_ddim(strides);
-        }
-        DDim stride_numel(const framework::DDim &ddim) {
-            std::vector<int64_t> strides(ddim.size());
-            strides[ddim.size() - 1] = ddim[ddim.size() - 1];
-            for (int i = ddim.size() - 2; i >= 0; --i) {
-                strides[i] = strides[i + 1] * ddim[i];
-            }
-            return framework::make_ddim(strides);
-        }
-    } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
@@ -22,145 +22,142 @@ limitations under the License. */
 #include <vector>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        /**
+/**
-         * \brief A dynamically sized dimension.
+ * \brief A dynamically sized dimension.
-         *
+ *
-         * The number of dimensions must be between [1, 9].
+ * The number of dimensions must be between [1, 9].
-         */
+ */
-        struct DDim {
+struct DDim {
-            typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>,
+    typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>,
-                            Dim<6>, Dim<7>, Dim<8>, Dim<9>>
+                    Dim<7>, Dim<8>, Dim<9>>
-                DDimVar;
+        DDimVar;
-            DDimVar var;
+    DDimVar var;
-            template <typename Vistor>
+    template <typename Vistor>
-            static typename Vistor::type_t ApplyVistor(Vistor vistor,
+    static typename Vistor::type_t ApplyVistor(Vistor vistor, const DDim &d) {
-                                                       const DDim &d) {
+        if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
-                if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
+            return vistor(d.var.Get<Dim<0>>());
-                    return vistor(d.var.Get<Dim<0>>());
+        } else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
+            return vistor(d.var.Get<Dim<1>>());
-                    return vistor(d.var.Get<Dim<1>>());
+        } else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
+            return vistor(d.var.Get<Dim<2>>());
-                    return vistor(d.var.Get<Dim<2>>());
+        } else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
+            return vistor(d.var.Get<Dim<3>>());
-                    return vistor(d.var.Get<Dim<3>>());
+        } else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
+            return vistor(d.var.Get<Dim<4>>());
-                    return vistor(d.var.Get<Dim<4>>());
+        } else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
+            return vistor(d.var.Get<Dim<5>>());
-                    return vistor(d.var.Get<Dim<5>>());
+        } else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
+            return vistor(d.var.Get<Dim<6>>());
-                    return vistor(d.var.Get<Dim<6>>());
+        } else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
+            return vistor(d.var.Get<Dim<7>>());
-                    return vistor(d.var.Get<Dim<7>>());
+        } else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
+            return vistor(d.var.Get<Dim<8>>());
-                    return vistor(d.var.Get<Dim<8>>());
+        } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
-                } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
+            return vistor(d.var.Get<Dim<9>>());
-                    return vistor(d.var.Get<Dim<9>>());
+        } else {
-                } else {
+            printf(" dim not support  \n");
-                    printf(" dim not support  \n");
+            throw std::bad_exception();
-                    throw std::bad_exception();
+            //        return typename Vistor::type_t();
-                    //        return typename Vistor::type_t();
+        }
-                }
+    }
-            }
+    DDim() { var.Set<Dim<1>>(Dim<1>()); }
-            DDim() { var.Set<Dim<1>>(Dim<1>()); }
+    template <int D> explicit DDim(const Dim<D> &in) { var.Set<Dim<D>>(in); }
-            template <int D> explicit DDim(const Dim<D> &in) {
-                var.Set<Dim<D>>(in);
+    /*implicit*/ DDim(std::initializer_list<int64_t> init_list);
-            }
+    template <int D> DDim &operator=(const Dim<D> &in) {
-            /*implicit*/ DDim(std::initializer_list<int64_t> init_list);
+        var.Set<Dim<D>>(in);
+        return *this;
-            template <int D> DDim &operator=(const Dim<D> &in) {
+    }
-                var.Set<Dim<D>>(in);
-                return *this;
+    int64_t &operator[](int idx);
-            }
+    int64_t operator[](int idx) const;
-            int64_t &operator[](int idx);
+    //  template <typename Visitor>
-            int64_t operator[](int idx) const;
+    //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
+    //    return var.apply_visitor(visitor);
-            //  template <typename Visitor>
+    //  }
-            //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
+    //
-            //    return var.apply_visitor(visitor);
+    //  template <typename Visitor>
-            //  }
+    //  typename Visitor::result_type apply_visitor(Visitor& visitor)
-            //
+    //  const {
-            //  template <typename Visitor>
+    //    return var.apply_visitor(visitor);
-            //  typename Visitor::result_type apply_visitor(Visitor& visitor)
+    //  }
-            //  const {
-            //    return var.apply_visitor(visitor);
+    DDimVar getVar() { return var; }
-            //  }
+    bool operator==(DDim d) const;
-            DDimVar getVar() { return var; }
+    bool operator!=(DDim d) const;
-            bool operator==(DDim d) const;
+    DDim operator+(DDim d) const;
-            bool operator!=(DDim d) const;
+    DDim operator*(DDim d) const;
-            DDim operator+(DDim d) const;
+    int size() const;
-            DDim operator*(DDim d) const;
+};
-            int size() const;
+/**
-        };
+ * \brief Make a DDim from std::vector<int64_t>
+ *
-        /**
+ * \param dims An vector of ints. Must be sized between [1, 9]
-         * \brief Make a DDim from std::vector<int64_t>
+ */
-         *
+DDim make_ddim(const std::vector<int64_t> &dims);
-         * \param dims An vector of ints. Must be sized between [1, 9]
-         */
+DDim make_ddim(const std::vector<int> &dims);
-        DDim make_ddim(const std::vector<int64_t> &dims);
+/**
-        DDim make_ddim(const std::vector<int> &dims);
+ * \brief Make a DDim from an initializer list
+ *
-        /**
+ * \param dims An initializer list of ints. Must be sized between [1, 9]
-         * \brief Make a DDim from an initializer list
+ *
-         *
+ */
-         * \param dims An initializer list of ints. Must be sized between [1, 9]
+DDim make_ddim(std::initializer_list<int64_t> dims);
-         *
-         */
+int64_t get(const DDim &dim, int idx);
-        DDim make_ddim(std::initializer_list<int64_t> dims);
+void set(DDim &dim, int idx, int val);
-        int64_t get(const DDim &dim, int idx);
+std::vector<int64_t> vectorize(const DDim &ddim);
-        void set(DDim &dim, int idx, int val);
+std::vector<int> vectorize2int(const DDim &ddim);
-        std::vector<int64_t> vectorize(const DDim &ddim);
+int64_t product(const DDim &ddim);
-        std::vector<int> vectorize2int(const DDim &ddim);
+/**
-        int64_t product(const DDim &ddim);
+ * \brief Slice a ddim
+ *
-        /**
+ * Slice dim with [begin, end).
-         * \brief Slice a ddim
+ * e.g.  DDim d = make_ddim({1,2,3,4,5});
-         *
+ *       slice_ddim(d, 1, 3); ====> {2,3}
-         * Slice dim with [begin, end).
+ */
-         * e.g.  DDim d = make_ddim({1,2,3,4,5});
+DDim slice_ddim(const DDim &dim, int begin, int end);
-         *       slice_ddim(d, 1, 3); ====> {2,3}
-         */
+/**
-        DDim slice_ddim(const DDim &dim, int begin, int end);
+ * \brief What is the length of this dimension?
+ *
-        /**
+ * \param Dynamic dimension to inspect
-         * \brief What is the length of this dimension?
+ */
-         *
-         * \param Dynamic dimension to inspect
-         */
-        int arity(const DDim &ddim);
+int arity(const DDim &ddim);
-        std::ostream &operator<<(std::ostream &, const DDim &);
+std::ostream &operator<<(std::ostream &, const DDim &);
-        // Reshape a tensor to a matrix. The matrix's first dimension(column
+// Reshape a tensor to a matrix. The matrix's first dimension(column
-        // length)
+// length)
-        // will be the product of tensor's first `num_col_dims` dimensions.
+// will be the product of tensor's first `num_col_dims` dimensions.
-        DDim flatten_to_2d(const DDim &src, int num_col_dims);
+DDim flatten_to_2d(const DDim &src, int num_col_dims);
-        DDim flatten_to_1d(const DDim &src);
+DDim flatten_to_1d(const DDim &src);
-        DDim stride(const DDim &ddim);
+DDim stride(const DDim &ddim);
-        DDim stride_numel(const DDim &ddim);
+DDim stride_numel(const DDim &ddim);
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/dim.h
+++ b/src/framework/dim.h
@@ -21,410 +21,392 @@
 #include "platform/hostdevice.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        // Statically sized, statically indexed dimension
+// Statically sized, statically indexed dimension
-        template <int i> struct Dim {
+template <int i> struct Dim {
-            static constexpr int dimensions = i;
+    static constexpr int dimensions = i;
-            template <typename... Args>
+    template <typename... Args>
-            HOSTDEVICE Dim(int64_t _head, Args... _tail)
+    HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
-                : head(_head), tail(_tail...) {
+        static_assert(sizeof...(_tail) == i - 1,
-                static_assert(
+                      "Dim initialized with the wrong number of parameters");
-                    sizeof...(_tail) == i - 1,
+    }
-                    "Dim initialized with the wrong number of parameters");
-            }
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim(int64_t _head, const Dim<i - 1> &_tail)
+    Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
-                : head(_head), tail(_tail) {}
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim() : head(0), tail() {}
+    Dim() : head(0), tail() {}
-            /** Construct a Dim from a linear index and size.  Uses Fortran
+    /** Construct a Dim from a linear index and size.  Uses Fortran
-             * order
+     * order
-             * indexing. */
+     * indexing. */
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim(int64_t idx, const Dim<i> &size)
+    Dim(int64_t idx, const Dim<i> &size)
-                : head(idx % size.head), tail(idx / size.head, size.tail) {}
+        : head(idx % size.head), tail(idx / size.head, size.tail) {}
-            /** Construct a Dim with each dimension set to the given index */
+    /** Construct a Dim with each dimension set to the given index */
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim(int64_t idx) : head(idx), tail(idx) {}
+    Dim(int64_t idx) : head(idx), tail(idx) {}
-            HOSTDEVICE
+    HOSTDEVICE
-            bool operator==(const Dim<i> &o) const {
+    bool operator==(const Dim<i> &o) const {
-                return (head == o.head) && (tail == o.tail);
+        return (head == o.head) && (tail == o.tail);
-            }
+    }
-            HOSTDEVICE
+    HOSTDEVICE
-            bool operator!=(const Dim<i> &o) const { return !(*this == o); }
+    bool operator!=(const Dim<i> &o) const { return !(*this == o); }
-            HOSTDEVICE
+    HOSTDEVICE
-            int64_t &operator[](int idx);
+    int64_t &operator[](int idx);
-            HOSTDEVICE
+    HOSTDEVICE
-            int64_t operator[](int idx) const;
+    int64_t operator[](int idx) const;
-            HOST std::string to_string() const;
+    HOST std::string to_string() const;
-            int64_t head;
+    int64_t head;
-            Dim<i - 1> tail;
+    Dim<i - 1> tail;
-        };
+};
-        // Base case specialization
+// Base case specialization
-        template <> struct Dim<0> {
+template <> struct Dim<0> {
-            static constexpr int dimensions = 0;
+    static constexpr int dimensions = 0;
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim(int64_t _head) {}
+    Dim(int64_t _head) {}
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim() {}
+    Dim() {}
-            HOSTDEVICE
+    HOSTDEVICE
-            Dim(int idx, const Dim<0> &size) {
+    Dim(int idx, const Dim<0> &size) {
 #ifndef __CUDA_ARCH__
-                if (idx > 0) {
+        if (idx > 0) {
-                    throw std::invalid_argument("Index out of range.");
+            throw std::invalid_argument("Index out of range.");
-                }
+        }
 #else
-                PADDLE_ASSERT(idx == 0);
+        PADDLE_ASSERT(idx == 0);
 #endif
-            }
+    }
-            HOSTDEVICE
+    HOSTDEVICE
-            bool operator==(const Dim<0> &o) const { return true; }
+    bool operator==(const Dim<0> &o) const { return true; }
-            HOSTDEVICE
+    HOSTDEVICE
-            bool operator!=(const Dim<0> &o) const { return false; }
+    bool operator!=(const Dim<0> &o) const { return false; }
-            HOSTDEVICE
+    HOSTDEVICE
-            int64_t &operator[](int idx);
+    int64_t &operator[](int idx);
-            HOSTDEVICE
+    HOSTDEVICE
-            int64_t operator[](int idx) const;
+    int64_t operator[](int idx) const;
-        };
+};
-        namespace {
+namespace {
-            // Helper for accessing Dim classes
+// Helper for accessing Dim classes
-            template <int i> struct DimGetter {
+template <int i> struct DimGetter {
-                // Return a copy if Dim is const
+    // Return a copy if Dim is const
-                template <typename D>
+    template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
-                HOSTDEVICE static int64_t impl(const D &d) {
+        return DimGetter<i - 1>::impl(d.tail);
-                    return DimGetter<i - 1>::impl(d.tail);
+    }
-                }
+    // Return a reference if Dim is mutable
-                // Return a reference if Dim is mutable
+    template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
-                template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
+        return DimGetter<i - 1>::impl(d.tail);
-                    return DimGetter<i - 1>::impl(d.tail);
+    }
-                }
+};
-            };
+// Eureka! We found the element!
-            // Eureka! We found the element!
+template <> struct DimGetter<0> {
-            template <> struct DimGetter<0> {
+    // Return a copy if Dim is const
-                // Return a copy if Dim is const
+    template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
-                template <typename D>
+        return d.head;
-                HOSTDEVICE static int64_t impl(const D &d) {
+    }
-                    return d.head;
+    // Return a reference if Dim is mutable
-                }
+    template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
-                // Return a reference if Dim is mutable
+        return d.head;
-                template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
+    }
-                    return d.head;
+};
-                }
-            };
+template <int D> HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
-            template <int D> HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-                if (idx < 0) {
+    if (idx < 0) {
-                    throw std::invalid_argument(
+        throw std::invalid_argument("Tried to access a negative dimension");
-                        "Tried to access a negative dimension");
+    }
-                }
 #else
-                PADDLE_ASSERT(idx >= 0);
+    PADDLE_ASSERT(idx >= 0);
 #endif
-                if (idx == 0) {
+    if (idx == 0) {
-                    return dim.head;
+        return dim.head;
-                }
+    }
-                return indexer(dim.tail, idx - 1);
+    return indexer(dim.tail, idx - 1);
-            }
+}
-            template <> HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
+template <> HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-                throw std::invalid_argument("Invalid index");
+    throw std::invalid_argument("Invalid index");
 #else
-                PADDLE_ASSERT(false);
+    PADDLE_ASSERT(false);
 #if CUDA_VERSION < 8000
-                // On CUDA versions previous to 8.0, only __shared__ variables
+    // On CUDA versions previous to 8.0, only __shared__ variables
-                // could be declared as static in the device code.
+    // could be declared as static in the device code.
-                int64_t head = 0;
+    int64_t head = 0;
 #else
-                static int64_t head = 0;
+    static int64_t head = 0;
 #endif
-                return head;
+    return head;
 #endif
-            }
+}
-            template <int D>
+template <int D> HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
-            HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-                if (idx < 0) {
+    if (idx < 0) {
-                    throw std::invalid_argument(
+        throw std::invalid_argument("Tried to access a negative dimension");
-                        "Tried to access a negative dimension");
+    }
-                }
 #else
-                PADDLE_ASSERT(idx >= 0);
+    PADDLE_ASSERT(idx >= 0);
 #endif
-                if (idx == 0) {
+    if (idx == 0) {
-                    return dim.head;
+        return dim.head;
-                }
+    }
-                return indexer(dim.tail, idx - 1);
+    return indexer(dim.tail, idx - 1);
-            }
+}
-            template <>
+template <> HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
-            HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-                throw std::invalid_argument("Invalid index");
+    throw std::invalid_argument("Invalid index");
 #else
-                PADDLE_ASSERT(false);
+    PADDLE_ASSERT(false);
 #if CUDA_VERSION < 8000
-                // On CUDA versions previous to 8.0, only __shared__ variables
+    // On CUDA versions previous to 8.0, only __shared__ variables
-                // could be declared as static in the device code.
+    // could be declared as static in the device code.
-                int64_t head = 0;
+    int64_t head = 0;
 #else
-                static int64_t head = 0;
+    static int64_t head = 0;
 #endif
-                return head;
+    return head;
 #endif
-            }
+}
-        } // namespace
+} // namespace
-        // Static access to constant Dim
+// Static access to constant Dim
-        template <int i, int l> HOSTDEVICE int64_t get(const Dim<l> &d) {
+template <int i, int l> HOSTDEVICE int64_t get(const Dim<l> &d) {
-            return DimGetter<i>::impl(d);
+    return DimGetter<i>::impl(d);
-        }
+}
-        // Static access to mutable Dim
+// Static access to mutable Dim
-        template <int i, int l> HOSTDEVICE int64_t &get(Dim<l> &d) {
+template <int i, int l> HOSTDEVICE int64_t &get(Dim<l> &d) {
-            return DimGetter<i>::impl(d);
+    return DimGetter<i>::impl(d);
-        }
+}
-        // Dynamic access to constant Dim
+// Dynamic access to constant Dim
-        template <int l> HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
+template <int l> HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
-            //  std::cout << "l: " << l << std::endl;
+    //  std::cout << "l: " << l << std::endl;
-            return indexer(*this, i);
+    return indexer(*this, i);
-        }
+}
-        // Dynamic access to mutable Dim
+// Dynamic access to mutable Dim
-        template <int l> HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
+template <int l> HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
-            return indexer(*this, i);
+    return indexer(*this, i);
-        }
+}
-        // Dynamic access to constant Dim
+// Dynamic access to constant Dim
-        inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
+inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
-            return indexer(*this, i);
+    return indexer(*this, i);
-        }
+}
-        // Dynamic access to mutable Dim
+// Dynamic access to mutable Dim
-        inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
+inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
-            return indexer(*this, i);
+    return indexer(*this, i);
-        }
+}
-        // Dynamic access to constant Dim
+// Dynamic access to constant Dim
-        // without std::enable_if will try to instantiate this on get<0>(d)
+// without std::enable_if will try to instantiate this on get<0>(d)
-        template <int l>
+template <int l>
-        HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type
+HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
-        get(const Dim<l> &d, int i) {
+                                                               int i) {
-            return d[i];
+    return d[i];
-        }
+}
-        // Dynamic access to mutable Dim
+// Dynamic access to mutable Dim
-        template <int l>
+template <int l>
-        HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type
+HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
-        get(Dim<l> &d, int i) {
+                                                                 int i) {
-            return d[i];
+    return d[i];
-        }
+}
-        // Dot product of two dims
+// Dot product of two dims
-        template <int i>
+template <int i>
-        HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
+HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
-            return a.head * b.head + linearize(a.tail, b.tail);
+    return a.head * b.head + linearize(a.tail, b.tail);
-        }
+}
-        // Base case dot product of two Dims
+// Base case dot product of two Dims
-        // Notice it is inline because it is no longer a template
+// Notice it is inline because it is no longer a template
-        template <>
+template <>
-        HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
+HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
-            return 0;
+    return 0;
-        }
+}
-        // Product of a Dim
+// Product of a Dim
-        template <int i>
+template <int i> HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
-        HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
+    return prod * a.head * product(a.tail);
-            return prod * a.head * product(a.tail);
+}
-        }
+// Base case product of a Dim
-        // Base case product of a Dim
+// Notice it is inline because it is no longer a template
-        // Notice it is inline because it is no longer a template
+template <> HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
-        template <>
+    return prod;
-        HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
+}
-            return prod;
-        }
+// Is 0 <= idx_i < size_i for all i?
+template <int i>
-        // Is 0 <= idx_i < size_i for all i?
+HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
-        template <int i>
+    return ((0 <= idx.head) && (idx.head < size.head) &&
-        HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
+            contained(idx.tail, size.tail));
-            return ((0 <= idx.head) && (idx.head < size.head) &&
+}
-                    contained(idx.tail, size.tail));
-        }
+// Base case of is 0 <= idx_i < size_i ?
+// Notice it is inline because it is no longer a template
-        // Base case of is 0 <= idx_i < size_i ?
+template <>
-        // Notice it is inline because it is no longer a template
+HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
-        template <>
+    return true;
-        HOSTDEVICE inline bool contained(const Dim<0> &idx,
+}
-                                         const Dim<0> &size) {
-            return true;
+/**
-        }
+ * \brief Compute exclusive prefix-multiply of a Dim.
+ */
-        /**
+template <int i>
-         * \brief Compute exclusive prefix-multiply of a Dim.
+HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
-         */
+    return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
-        template <int i>
+}
-        HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
-            return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
+///\cond HIDDEN
-        }
+// Base case of ex_prefix_mul
+// Notice it is inline because it is no longer a template
-        ///\cond HIDDEN
+template <> HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
-        // Base case of ex_prefix_mul
+    return Dim<0>();
-        // Notice it is inline because it is no longer a template
+}
-        template <>
+///\endcond
-        HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
-            return Dim<0>();
+/**
-        }
+ * Add two dimensions together
-        ///\endcond
+ */
+template <int i> HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
-        /**
+    return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
-         * Add two dimensions together
+}
-         */
-        template <int i>
+// Base case
-        HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
+template <>
-            return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
+HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
-        }
+    return Dim<0>();
+}
-        // Base case
-        template <>
+template <int i>
-        HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
+HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
-            return Dim<0>();
+    return dim_plus(lhs, rhs);
-        }
+}
-        template <int i>
+/**
-        HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
+ * Multiply two dimensions together
-            return dim_plus(lhs, rhs);
+ */
-        }
+template <int i> HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
+    return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
-        /**
+}
-         * Multiply two dimensions together
-         */
+// Base case
-        template <int i>
+template <>
-        HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
+HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
-            return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
+    return Dim<0>();
-        }
+}
-        // Base case
+template <int i>
-        template <>
+HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
-        HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
+    return dim_mult(lhs, rhs);
-            return Dim<0>();
+}
-        }
+/**
-        template <int i>
+ * \brief Normalize strides to ensure any dimension with extent 1
-        HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
+ * has stride 0.
-            return dim_mult(lhs, rhs);
+ *
-        }
+ * \param size Dim object containing the size of an array
+ * \param stride Dim object containing stride of an array
-        /**
+ * \return Dim object the same size as \p size with normalized strides
-         * \brief Normalize strides to ensure any dimension with extent 1
+ *
-         * has stride 0.
+ */
-         *
-         * \param size Dim object containing the size of an array
+template <int i>
-         * \param stride Dim object containing stride of an array
+HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
-         * \return Dim object the same size as \p size with normalized strides
+    int norm_stride = size.head == 1 ? 0 : stride.head;
-         *
+    return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
-         */
+}
-        template <int i>
+///\cond HIDDEN
-        HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size,
-                                            const Dim<i> &stride) {
+template <>
-            int norm_stride = size.head == 1 ? 0 : stride.head;
+HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
-            return Dim<i>(norm_stride,
+                                           const Dim<0> &stride) {
-                          normalize_strides(size.tail, stride.tail));
+    return Dim<0>();
-        }
+}
-        ///\cond HIDDEN
+///\endcond
-        template <>
+/**
-        HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
+ * Helper function to create a Dim
-                                                   const Dim<0> &stride) {
+ *
-            return Dim<0>();
+ * \param idxes The type of Dim constructed depends on the number of
-        }
+ * params
+ *
-        ///\endcond
+ */
-        /**
+template <typename... Args>
-         * Helper function to create a Dim
+HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
-         *
+    return Dim<sizeof...(Args)>(idxes...);
-         * \param idxes The type of Dim constructed depends on the number of
+}
-         * params
-         *
+// Allows us to output a Dim
-         */
+// XXX For some reason, overloading fails to resolve this correctly
+template <int i>
-        template <typename... Args>
+typename std::enable_if<(i > 1), std::ostream &>::type
-        HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
+operator<<(std::ostream &os, const Dim<i> &d) {
-            return Dim<sizeof...(Args)>(idxes...);
+    os << d.head << ", " << d.tail;
-        }
+    return os;
+}
-        // Allows us to output a Dim
-        // XXX For some reason, overloading fails to resolve this correctly
+// Base case that allows us to output a Dim
-        template <int i>
+// XXX I wish this could be an overload instead of a template
-        typename std::enable_if<(i > 1), std::ostream &>::type
+template <int i>
-        operator<<(std::ostream &os, const Dim<i> &d) {
+typename std::enable_if<(i == 1), std::ostream &>::type
-            os << d.head << ", " << d.tail;
+operator<<(std::ostream &os, const Dim<i> &d) {
-            return os;
+    os << d.head;
-        }
+    return os;
+}
-        // Base case that allows us to output a Dim
-        // XXX I wish this could be an overload instead of a template
+inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
-        template <int i>
+    return os;
-        typename std::enable_if<(i == 1), std::ostream &>::type
+}
-        operator<<(std::ostream &os, const Dim<i> &d) {
-            os << d.head;
+template <int i> HOST std::string Dim<i>::to_string() const {
-            return os;
+    std::stringstream stream;
-        }
+    stream << *this;
-        inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
-            return os;
+    return stream.str();
-        }
+}
-        template <int i> HOST std::string Dim<i>::to_string() const {
+template <int D>
-            std::stringstream stream;
+HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
+    Dim<D> result;
-            stream << *this;
+    for (int i = 0; i < D - 1; ++i) {
-            return stream.str();
+        result[i] = linear_index % extents[i];
-        }
+        linear_index /= extents[i];
+    }
-        template <int D>
-        HOSTDEVICE Dim<D> linear_to_dimension(int linear_index,
+    result[D - 1] = linear_index;
-                                              Dim<D> extents) {
-            Dim<D> result;
+    return result;
+}
-            for (int i = 0; i < D - 1; ++i) {
-                result[i] = linear_index % extents[i];
+} // namespace framework
-                linear_index /= extents[i];
-            }
-            result[D - 1] = linear_index;
-            return result;
-        }
-    } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -20,78 +20,74 @@ SOFTWARE.
 #include "executor.h"
 #include "lod_tensor.h"
 #include "operators/conv_op.h"
-#include "variable.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype>
+template <typename Dtype>
-        Executor<Dtype>::Executor(const Program<Dtype> p) : program_(p) {
+Executor<Dtype>::Executor(const Program<Dtype> p) : program_(p) {
-            if (use_optimize_) {
+    if (use_optimize_) {
-                to_predict_program_ = program_.optimizeProgram;
+        to_predict_program_ = program_.optimizeProgram;
-            } else {
+    } else {
-                to_predict_program_ = program_.originProgram;
+        to_predict_program_ = program_.originProgram;
-            }
+    }
-            const std::vector<std::shared_ptr<BlockDesc>> blocks =
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-                to_predict_program_->Blocks();
+        to_predict_program_->Blocks();
-            for (int i = 0; i < blocks.size(); ++i) {
+    for (int i = 0; i < blocks.size(); ++i) {
-                std::shared_ptr<BlockDesc> block_desc = blocks[i];
+        std::shared_ptr<BlockDesc> block_desc = blocks[i];
-                std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+        std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-                for (int j = 0; j < ops.size(); ++j) {
+        for (int j = 0; j < ops.size(); ++j) {
-                    std::shared_ptr<OpDesc> op = ops[j];
+            std::shared_ptr<OpDesc> op = ops[j];
-                    if (op->Type() == "conv2d" &&
+            if (op->Type() == "conv2d" && op->Input("Input")[0] == "pixel") {
-                        op->Input("Input")[0] == "pixel") {
+                Attribute strides_attr = op->GetAttrMap().at("strides");
-                        Attribute strides_attr = op->GetAttrMap().at("strides");
+                std::vector<int> stride = strides_attr.Get<std::vector<int>>();
-                        std::vector<int> stride =
+                for (int k = 0; k < stride.size(); ++k) {
-                            strides_attr.Get<std::vector<int>>();
-                        for (int k = 0; k < stride.size(); ++k) {
-                        }
-                        std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
-                            std::make_shared<operators::ConvOp<Dtype, float>>(
-                                op->Type(), op->GetInputs(), op->GetOutputs(),
-                                op->GetAttrMap(), program_.scope);
-                        ops_of_block_[*block_desc.get()].push_back(conv);
-                    }
                }
+                std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
+                    std::make_shared<operators::ConvOp<Dtype, float>>(
+                        op->Type(), op->GetInputs(), op->GetOutputs(),
+                        op->GetAttrMap(), program_.scope);
+                ops_of_block_[*block_desc.get()].push_back(conv);
            }
        }
+    }
+}
-        template <typename Dtype>
+template <typename Dtype>
-        std::shared_ptr<Tensor> Executor<Dtype>::predict(Tensor &t) {
+std::shared_ptr<Tensor> Executor<Dtype>::predict(Tensor &t) {
-            // feed
+    // feed
-            auto scope = program_.scope;
+    auto scope = program_.scope;
-            Variable *g_feed_value = scope->Var("pixel");
+    Variable *g_feed_value = scope->Var("pixel");
-            auto tensor = g_feed_value->GetMutable<Tensor>();
+    auto tensor = g_feed_value->GetMutable<Tensor>();
-            tensor->ShareDataWith(t);
+    tensor->ShareDataWith(t);
-            Variable *con_output = scope->Var("conv2d_0.tmp_0");
+    Variable *con_output = scope->Var("conv2d_0.tmp_0");
-            Tensor *output_tensor = con_output->GetMutable<Tensor>();
+    Tensor *output_tensor = con_output->GetMutable<Tensor>();
-            output_tensor->mutable_data<float>({1, 16, 32, 32});
+    output_tensor->mutable_data<float>({1, 16, 32, 32});
-            //  std::cout << typeid(output_tensor).name() << std::endl;
+    //  std::cout << typeid(output_tensor).name() << std::endl;
-            //  std::cout << "output_tensor dims: " << output_tensor->dims() <<
+    //  std::cout << "output_tensor dims: " << output_tensor->dims() <<
-            //  std::endl;
+    //  std::endl;
-            std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-            out_tensor.reset(output_tensor);
+    out_tensor.reset(output_tensor);
-            predict(t, 0);
+    predict(t, 0);
-            return out_tensor;
+    return out_tensor;
-        }
+}
-        template <typename Dtype>
+template <typename Dtype>
-        void Executor<Dtype>::predict(const Tensor &t, int block_id) {
+void Executor<Dtype>::predict(const Tensor &t, int block_id) {
-            std::shared_ptr<BlockDesc> to_predict_block =
+    std::shared_ptr<BlockDesc> to_predict_block =
-                to_predict_program_->Block(block_id);
+        to_predict_program_->Block(block_id);
-            for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-                 ++j) {
+        auto op = ops_of_block_[*to_predict_block.get()][j];
-                auto op = ops_of_block_[*to_predict_block.get()][j];
+        //    std::cout << "开始run" << std::endl;
-                //    std::cout << "开始run" << std::endl;
+        op->Run();
-                op->Run();
+    }
-            }
+}
-        }
-        template class Executor<CPU>;
+template class Executor<CPU>;
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/executor.h
+++ b/src/framework/executor.h
@@ -32,22 +32,22 @@ SOFTWARE.
 #include "variable.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype> class Executor {
+template <typename Dtype> class Executor {
-          public:
+  public:
-            Executor(const Program<Dtype> p);
+    Executor(const Program<Dtype> p);
-            std::shared_ptr<Tensor> predict(Tensor &t);
+    std::shared_ptr<Tensor> predict(Tensor &t);
-          private:
+  private:
-            const framework::Program<Dtype> program_;
+    const framework::Program<Dtype> program_;
-            std::shared_ptr<ProgramDesc> to_predict_program_;
+    std::shared_ptr<ProgramDesc> to_predict_program_;
-            void predict(const Tensor &t, int block_id);
+    void predict(const Tensor &t, int block_id);
-            std::map<framework::BlockDesc,
+    std::map<framework::BlockDesc,
-                     std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-                ops_of_block_;
+        ops_of_block_;
-            bool use_optimize_ = false;
+    bool use_optimize_ = false;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/framework.pb.cpp
+++ b/src/framework/framework.pb.cpp
--- a/src/framework/framework.pb.h
+++ b/src/framework/framework.pb.h
--- a/src/framework/lod_tensor.cc
+++ b/src/framework/lod_tensor.cc
@@ -19,304 +19,295 @@ limitations under the License. */
 #include <string.h>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        std::ostream &operator<<(std::ostream &os, const LoD &lod) {
+std::ostream &operator<<(std::ostream &os, const LoD &lod) {
-            os << "{";
+    os << "{";
-            for (auto &v : lod) {
+    for (auto &v : lod) {
-                os << "{";
+        os << "{";
-                bool is_first = true;
+        bool is_first = true;
-                for (auto &i : v) {
+        for (auto &i : v) {
-                    if (is_first) {
+            if (is_first) {
-                        os << i;
+                os << i;
-                        is_first = false;
+                is_first = false;
-                    } else {
+            } else {
-                        os << ", " << i;
+                os << ", " << i;
-                    }
-                }
-                os << "}";
            }
-            os << "}";
-            return os;
        }
+        os << "}";
-        std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
+    }
-            //  PADDLE_ENFORCE(t.type().hash_code() ==
+    os << "}";
-            //  typeid(float).hash_code());
+    return os;
-            //  if (!platform::is_cpu_place(t.place())) {
+}
-            //    LoDTensor tt;
-            //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
+std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-            //    platform::DeviceContextPool &pool =
+    //  PADDLE_ENFORCE(t.type().hash_code() ==
-            //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
+    //  typeid(float).hash_code());
-            //    *pool.Get(t.place()); dev_ctx.Wait();
-            //
+    //  if (!platform::is_cpu_place(t.place())) {
-            //    os << tt;
+    //    LoDTensor tt;
-            //    return os;
+    //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
-            //  }
+    //    platform::DeviceContextPool &pool =
+    //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
-            os << "dim: " << t.dims() << "\n";
+    //    *pool.Get(t.place()); dev_ctx.Wait();
-            os << "lod: " << t.lod() << "\n";
+    //
+    //    os << tt;
-            // only print first ten elements
+    //    return os;
-            int64_t size = t.numel() < 10 ? t.numel() : 10;
+    //  }
-            for (int64_t i = 0; i < size; ++i) {
-                os << t.data<float>()[i] << " ";
+    os << "dim: " << t.dims() << "\n";
-            }
+    os << "lod: " << t.lod() << "\n";
-            return os;
+    // only print first ten elements
+    int64_t size = t.numel() < 10 ? t.numel() : 10;
+    for (int64_t i = 0; i < size; ++i) {
+        os << t.data<float>()[i] << " ";
+    }
+    return os;
+}
+std::string LoDToString(const LoD &lod) {
+    std::ostringstream stream;
+    stream << lod;
+    return stream.str();
+}
+LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
+                 size_t elem_end) {
+    //  PADDLE_ENFORCE_LT(level, in.size());
+    //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+    LoD res;
+    res.resize(in.size() - level);
+    // copy the first level
+    res[0].assign(in[level].begin() + elem_begin,
+                  in[level].begin() + elem_end + 1);
+    for (size_t lvl = 1; lvl < res.size(); lvl++) {
+        const auto &in_level = in[level + lvl];
+        const auto &above_level = res[lvl - 1];
+        auto &out_level = res[lvl];
+        out_level.assign(in_level.begin() + above_level.front(),
+                         in_level.begin() + above_level.back() + 1);
+    }
+    for (size_t lvl = 0; lvl < res.size(); lvl++) {
+        // to make the first offset equals 0, all the elements minus the
+        // first
+        // element
+        size_t front = res[lvl].front();
+        for (auto &ele : res[lvl]) {
+            ele -= front;
        }
+    }
-        std::string LoDToString(const LoD &lod) {
+    return res;
-            std::ostringstream stream;
+}
-            stream << lod;
-            return stream.str();
+LoD ToAbsOffset(const LoD &in) {
+    // the lowest level stores relative offsets
+    if (in.empty() || in.size() == 1)
+        return in;
+    LoD result = in;
+    for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
+        for (size_t i = 0; i < in[level].size(); ++i) {
+            size_t index = in[level][i];
+            result[level][i] = result[level + 1][index];
        }
+    }
-        LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
+    return result;
-                         size_t elem_end) {
+}
-            //  PADDLE_ENFORCE_LT(level, in.size());
-            //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+bool operator==(const LoD &a, const LoD &b) {
+    if (a.size() != b.size()) {
-            LoD res;
+        return false;
-            res.resize(in.size() - level);
+    }
-            // copy the first level
-            res[0].assign(in[level].begin() + elem_begin,
+    for (size_t i = 0; i < a.size(); i++) {
-                          in[level].begin() + elem_end + 1);
+        const auto &a_level = a[i];
-            for (size_t lvl = 1; lvl < res.size(); lvl++) {
+        const auto &b_level = b[i];
-                const auto &in_level = in[level + lvl];
+        if (a_level.size() != b_level.size()) {
-                const auto &above_level = res[lvl - 1];
+            return false;
-                auto &out_level = res[lvl];
-                out_level.assign(in_level.begin() + above_level.front(),
-                                 in_level.begin() + above_level.back() + 1);
-            }
-            for (size_t lvl = 0; lvl < res.size(); lvl++) {
-                // to make the first offset equals 0, all the elements minus the
-                // first
-                // element
-                size_t front = res[lvl].front();
-                for (auto &ele : res[lvl]) {
-                    ele -= front;
-                }
-            }
-            return res;
        }
+        for (size_t j = 0; j < a_level.size(); j++) {
-        LoD ToAbsOffset(const LoD &in) {
+            if (a_level[j] != b_level[j]) {
-            // the lowest level stores relative offsets
-            if (in.empty() || in.size() == 1)
-                return in;
-            LoD result = in;
-            for (auto level = static_cast<int>(in.size() - 2); level >= 0;
-                 level--) {
-                for (size_t i = 0; i < in[level].size(); ++i) {
-                    size_t index = in[level][i];
-                    result[level][i] = result[level + 1][index];
-                }
-            }
-            return result;
-        }
-        bool operator==(const LoD &a, const LoD &b) {
-            if (a.size() != b.size()) {
                return false;
            }
-            for (size_t i = 0; i < a.size(); i++) {
-                const auto &a_level = a[i];
-                const auto &b_level = b[i];
-                if (a_level.size() != b_level.size()) {
-                    return false;
-                }
-                for (size_t j = 0; j < a_level.size(); j++) {
-                    if (a_level[j] != b_level[j]) {
-                        return false;
-                    }
-                }
-            }
-            return true;
        }
+    }
-        bool CheckLoD(const LoD &in, int tensor_height) {
+    return true;
-            if (in.empty())
+}
-                return true;
-            for (const auto &level : in) {
+bool CheckLoD(const LoD &in, int tensor_height) {
-                // check: there should be more than 2 offsets existing in each
+    if (in.empty())
-                // level.
+        return true;
-                if (level.size() < 2)
+    for (const auto &level : in) {
-                    return false;
+        // check: there should be more than 2 offsets existing in each
-                // check: the first offset(the begin offset) of each level
+        // level.
-                // should be 0.
+        if (level.size() < 2)
-                if (level.front() != 0)
+            return false;
-                    return false;
+        // check: the first offset(the begin offset) of each level
-                // check: all the offsets in a level should be ascending(no same
+        // should be 0.
-                // items
+        if (level.front() != 0)
-                // allows).
+            return false;
-                if (!std::is_sorted(level.begin(), level.begin(),
+        // check: all the offsets in a level should be ascending(no same
-                                    [](size_t a, size_t b) {
+        // items
-                                        if (a < b)
+        // allows).
-                                            return true;
+        if (!std::is_sorted(level.begin(), level.begin(),
-                                        return false;
+                            [](size_t a, size_t b) {
-                                    })) {
+                                if (a < b)
-                    std::cout << "ascending error";
+                                    return true;
-                    return false;
+                                return false;
-                }
+                            })) {
-            }
+            std::cout << "ascending error";
-            // check: the lowest level's last offset should equals
+            return false;
-            // `tensor_height` if
-            //        tensor_height>0.
-            if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
-                return false;
-            // check: the higher level's last offset should equals the lower
-            // level's
-            // size-1.
-            // NOTE LoD store the levels from top to bottom, so the higher level
-            // goes
-            // first.
-            for (size_t level = 0; level < in.size() - 1; level++) {
-                if (in[level].back() != in[level + 1].size() - 1)
-                    return false;
-            }
-            return true;
        }
+    }
-        bool CheckAbsLoD(const LoD &in, int tensor_height) {
+    // check: the lowest level's last offset should equals
-            if (in.empty())
+    // `tensor_height` if
-                return true;
+    //        tensor_height>0.
-            for (const auto &level : in) {
+    if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
-                // check: all the offsets in a level should be ascending(no same
+        return false;
-                // items
-                // allows).
+    // check: the higher level's last offset should equals the lower
-                if (!std::is_sorted(level.begin(), level.begin(),
+    // level's
-                                    [](size_t a, size_t b) {
+    // size-1.
-                                        if (a < b)
+    // NOTE LoD store the levels from top to bottom, so the higher level
-                                            return true;
+    // goes
-                                        return false;
+    // first.
-                                    })) {
+    for (size_t level = 0; level < in.size() - 1; level++) {
-                    return false;
+        if (in[level].back() != in[level + 1].size() - 1)
-                }
+            return false;
+    }
-                // check: there should be more than 2 offsets existing in each
+    return true;
-                // level.
+}
-                if (level.size() < 2)
-                    return false;
+bool CheckAbsLoD(const LoD &in, int tensor_height) {
+    if (in.empty())
-                // check: the first offset of each level should be 0, and the
+        return true;
-                // last should be
+    for (const auto &level : in) {
-                // the same(the height of underlying tensor).
+        // check: all the offsets in a level should be ascending(no same
-                if (level.front() != 0)
+        // items
-                    return false;
+        // allows).
-                if (tensor_height < 0) {
+        if (!std::is_sorted(level.begin(), level.begin(),
-                    tensor_height = level.back();
+                            [](size_t a, size_t b) {
-                } else if ((size_t)tensor_height != level.back()) {
+                                if (a < b)
-                    return false;
+                                    return true;
-                }
+                                return false;
-            }
+                            })) {
-            return true;
+            return false;
        }
-        using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
+        // check: there should be more than 2 offsets existing in each
+        // level.
-        LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod,
+        if (level.size() < 2)
-                                                size_t start_idx,
+            return false;
-                                                size_t end_idx,
-                                                size_t start_level) {
+        // check: the first offset of each level should be 0, and the
-            LoD sub_lod;
+        // last should be
+        // the same(the height of underlying tensor).
-            for (size_t level_idx = start_level; level_idx < lod.size();
+        if (level.front() != 0)
-                 ++level_idx) {
+            return false;
-                //    PADDLE_ENFORCE_LE(start_idx, end_idx);
+        if (tensor_height < 0) {
-                //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
+            tensor_height = level.back();
-                std::vector<size_t> level_lens;
+        } else if ((size_t)tensor_height != level.back()) {
-                for (size_t i = start_idx; i < end_idx; ++i) {
+            return false;
-                    level_lens.push_back(lod[level_idx][i + 1] -
-                                         lod[level_idx][i]);
-                }
-                sub_lod.emplace_back(level_lens);
-                start_idx = lod[level_idx][start_idx];
-                end_idx = lod[level_idx][end_idx];
-            }
-            return LoDAndOffset{sub_lod, {start_idx, end_idx}};
        }
+    }
-        void AppendLoD(LoD *lod, const LoD &lod_length) {
+    return true;
-            //  PADDLE_ENFORCE(
+}
-            //      lod->empty() || lod->size() == lod_length.size(),
-            //      "The lod_length should has the same size with the appended
+using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
-            //      lod.");
-            if (lod->empty()) {
+LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
-                for (size_t i = 0; i < lod_length.size(); ++i) {
+                                        size_t end_idx, size_t start_level) {
-                    lod->emplace_back(1, 0); // size = 1, value = 0;
+    LoD sub_lod;
-                }
-                *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
+    for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
-            }
+        //    PADDLE_ENFORCE_LE(start_idx, end_idx);
-            for (size_t i = 0; i < lod->size(); ++i) {
+        //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
-                auto &level = (*lod)[i];
+        std::vector<size_t> level_lens;
-                for (size_t len : lod_length[i]) {
+        for (size_t i = start_idx; i < end_idx; ++i) {
-                    level.push_back(level.back() + len);
+            level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
-                }
-            }
        }
+        sub_lod.emplace_back(level_lens);
-        void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
+        start_idx = lod[level_idx][start_idx];
-            { // the 1st field, uint32_t version for LoDTensor
+        end_idx = lod[level_idx][end_idx];
-                constexpr uint32_t version = 0;
+    }
-                os.write(reinterpret_cast<const char *>(&version),
-                         sizeof(version));
+    return LoDAndOffset{sub_lod, {start_idx, end_idx}};
-            }
+}
-            {
-                // the 2st field, LoD information
+void AppendLoD(LoD *lod, const LoD &lod_length) {
-                // uint64_t lod_level
+    //  PADDLE_ENFORCE(
-                // uint64_t lod_level_1 size in byte.
+    //      lod->empty() || lod->size() == lod_length.size(),
-                // int*     lod_level_1 data
+    //      "The lod_length should has the same size with the appended
-                // ...
+    //      lod.");
-                auto lod = tensor.lod();
+    if (lod->empty()) {
-                uint64_t size = lod.size();
+        for (size_t i = 0; i < lod_length.size(); ++i) {
-                os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+            lod->emplace_back(1, 0); // size = 1, value = 0;
-                for (auto &each : lod) {
-                    size = each.size() *
-                           sizeof(framework::LoD::value_type::value_type);
-                    os.write(reinterpret_cast<const char *>(&size),
-                             sizeof(size));
-                    os.write(reinterpret_cast<const char *>(each.data()),
-                             static_cast<std::streamsize>(size));
-                }
-            }
-            // the 3st field, Tensor
-            TensorToStream(os, static_cast<Tensor>(tensor));
        }
+        *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
-        void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
+    }
-            {
+    for (size_t i = 0; i < lod->size(); ++i) {
-                // the 1st field, unit32_t version for LoDTensor
+        auto &level = (*lod)[i];
-                uint32_t version;
+        for (size_t len : lod_length[i]) {
-                is.read(reinterpret_cast<char *>(&version), sizeof(version));
+            level.push_back(level.back() + len);
-                //    PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is
+        }
-                //    supported");
+    }
-            }
+}
-            {
-                // the 2st field, LoD information
+void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
-                uint64_t lod_level;
+    { // the 1st field, uint32_t version for LoDTensor
-                is.read(reinterpret_cast<char *>(&lod_level),
+        constexpr uint32_t version = 0;
-                        sizeof(lod_level));
+        os.write(reinterpret_cast<const char *>(&version), sizeof(version));
-                auto &lod = *tensor->mutable_lod();
+    }
-                lod.resize(lod_level);
+    {
-                for (uint64_t i = 0; i < lod_level; ++i) {
+        // the 2st field, LoD information
-                    uint64_t size;
+        // uint64_t lod_level
-                    is.read(reinterpret_cast<char *>(&size), sizeof(size));
+        // uint64_t lod_level_1 size in byte.
-                    std::vector<size_t> tmp(size / sizeof(size_t));
+        // int*     lod_level_1 data
-                    is.read(reinterpret_cast<char *>(tmp.data()),
+        // ...
-                            static_cast<std::streamsize>(size));
+        auto lod = tensor.lod();
-                    lod[i] = tmp;
+        uint64_t size = lod.size();
-                }
+        os.write(reinterpret_cast<const char *>(&size), sizeof(size));
-            }
-            // the 3st filed, Tensor
+        for (auto &each : lod) {
-            TensorFromStream(is, static_cast<Tensor *>(tensor));
+            size = each.size() * sizeof(framework::LoD::value_type::value_type);
+            os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+            os.write(reinterpret_cast<const char *>(each.data()),
+                     static_cast<std::streamsize>(size));
+        }
+    }
+    // the 3st field, Tensor
+    TensorToStream(os, static_cast<Tensor>(tensor));
+}
+void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
+    {
+        // the 1st field, unit32_t version for LoDTensor
+        uint32_t version;
+        is.read(reinterpret_cast<char *>(&version), sizeof(version));
+        //    PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is
+        //    supported");
+    }
+    {
+        // the 2st field, LoD information
+        uint64_t lod_level;
+        is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+        auto &lod = *tensor->mutable_lod();
+        lod.resize(lod_level);
+        for (uint64_t i = 0; i < lod_level; ++i) {
+            uint64_t size;
+            is.read(reinterpret_cast<char *>(&size), sizeof(size));
+            std::vector<size_t> tmp(size / sizeof(size_t));
+            is.read(reinterpret_cast<char *>(tmp.data()),
+                    static_cast<std::streamsize>(size));
+            lod[i] = tmp;
        }
+    }
+    // the 3st filed, Tensor
+    TensorFromStream(is, static_cast<Tensor *>(tensor));
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/lod_tensor.h
+++ b/src/framework/lod_tensor.h
@@ -23,190 +23,186 @@ limitations under the License. */
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        /*
+/*
-         * LoD is short for Level of Details.
+ * LoD is short for Level of Details.
-         *
+ *
-         * - in a level, each element indicates relative offset of the lower
+ * - in a level, each element indicates relative offset of the lower
-         * level
+ * level
-         * - the first element should be 0 and that indicates that this sequence
+ * - the first element should be 0 and that indicates that this sequence
-         * start
+ * start
-         * from 0
+ * from 0
-         * - each sequence's begin and end(no-inclusive) is level[id, id+1]
+ * - each sequence's begin and end(no-inclusive) is level[id, id+1]
-         *
+ *
-         * For example:
+ * For example:
-         *    3-level LoD stores
+ *    3-level LoD stores
-         *
+ *
-         *    0 2 3
+ *    0 2 3
-         *    0 2 4 7
+ *    0 2 4 7
-         *    0 2 5 7 10 12 15 20
+ *    0 2 5 7 10 12 15 20
-         */
+ */
-        using LoD = std::vector<std::vector<size_t>>;
+using LoD = std::vector<std::vector<size_t>>;
-        std::ostream &operator<<(std::ostream &os, const LoD &lod);
+std::ostream &operator<<(std::ostream &os, const LoD &lod);
-        std::ostream &operator<<(std::ostream &os, const LoDTensor &t);
+std::ostream &operator<<(std::ostream &os, const LoDTensor &t);
-        std::string LoDToString(const LoD &lod);
+std::string LoDToString(const LoD &lod);
-        LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
+LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
-                         size_t elem_end);
+                 size_t elem_end);
-        /*
+/*
-         * Transform an LoD from relative offsets to absolute offsets.
+ * Transform an LoD from relative offsets to absolute offsets.
-         */
+ */
-        LoD ToAbsOffset(const LoD &in);
+LoD ToAbsOffset(const LoD &in);
-        bool operator==(const LoD &a, const LoD &b);
+bool operator==(const LoD &a, const LoD &b);
-        /*
+/*
-         * Check whether this lod's format is valid.
+ * Check whether this lod's format is valid.
-         *
+ *
-         * ATTENTION:
+ * ATTENTION:
-         *   - Empty lod is treated as valid.
+ *   - Empty lod is treated as valid.
-         *
+ *
-         * It will check two things:
+ * It will check two things:
-         *
+ *
-         *  1. all the offsets in a level should be ascending(no same items
+ *  1. all the offsets in a level should be ascending(no same items
-         * allows).
+ * allows).
-         *  2. there should be more than 2 offsets existing in each level.
+ *  2. there should be more than 2 offsets existing in each level.
-         *  3. the higher level's last offset should equals the lower level's
+ *  3. the higher level's last offset should equals the lower level's
-         * size-1.
+ * size-1.
-         *  4. the first offset(the begin offset) of each level should be 0.
+ *  4. the first offset(the begin offset) of each level should be 0.
-         *  5. the lowest level's last offset should equals `tensor_height` if
+ *  5. the lowest level's last offset should equals `tensor_height` if
-         * tensor_height>0.
+ * tensor_height>0.
-         */
+ */
-        bool CheckLoD(const LoD &in, int tensor_height = -1);
+bool CheckLoD(const LoD &in, int tensor_height = -1);
-        /*
+/*
-         * Check whether this absolute lod's format is valid.
+ * Check whether this absolute lod's format is valid.
-         *
+ *
-         * ATTENTION:
+ * ATTENTION:
-         *   - Empty lod is treated as valid.
+ *   - Empty lod is treated as valid.
-         *
+ *
-         * It will check two things:
+ * It will check two things:
-         *  1. all the offsets in a level should be ascending(no same items
+ *  1. all the offsets in a level should be ascending(no same items
-         * allows)
+ * allows)
-         *  2. there should be more than 2 offsets existing in each level.
+ *  2. there should be more than 2 offsets existing in each level.
-         *  3. the first offset of each level should be 0, and the last should
+ *  3. the first offset of each level should be 0, and the last should
-         * be the
+ * be the
-         *     same(the height of underlying tensor) or `tensor_height` if
+ *     same(the height of underlying tensor) or `tensor_height` if
-         *     tensor_height>0.
+ *     tensor_height>0.
-         */
+ */
-        bool CheckAbsLoD(const LoD &in, int tensor_height = -1);
+bool CheckAbsLoD(const LoD &in, int tensor_height = -1);
-        /*
+/*
-         * LoDTensor (Level of details Tensor)
+ * LoDTensor (Level of details Tensor)
-         * see https://en.wikipedia.org/wiki/Level_of_details for reference.
+ * see https://en.wikipedia.org/wiki/Level_of_details for reference.
-         */
+ */
-        class LoDTensor : public Tensor {
+class LoDTensor : public Tensor {
-          public:
+  public:
-            LoDTensor() : Tensor() {}
+    LoDTensor() : Tensor() {}
-            explicit LoDTensor(const LoD &lod) : lod_(lod) {}
+    explicit LoDTensor(const LoD &lod) : lod_(lod) {}
-            void set_lod(const LoD &lod) { lod_ = lod; }
+    void set_lod(const LoD &lod) { lod_ = lod; }
-            const LoD &lod() const { return lod_; }
+    const LoD &lod() const { return lod_; }
-            LoD *mutable_lod() { return &lod_; }
+    LoD *mutable_lod() { return &lod_; }
-            /*
+    /*
-             * Get the start offset and end offset of an  element from LoD.
+     * Get the start offset and end offset of an  element from LoD.
-             */
+     */
-            std::pair<size_t, size_t> lod_element(size_t level,
+    std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
-                                                  size_t elem) const {
+        //    PADDLE_ENFORCE_LT(level, NumLevels());
-                //    PADDLE_ENFORCE_LT(level, NumLevels());
+        //    PADDLE_ENFORCE_LT(elem, NumElements(level));
-                //    PADDLE_ENFORCE_LT(elem, NumElements(level));
+        return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
-                return std::make_pair((lod_)[level][elem],
+    }
-                                      (lod_)[level][elem + 1]);
-            }
+    /*
+     * Number of LoDTensor's levels, each level has units of data, for
-            /*
+     * example,
-             * Number of LoDTensor's levels, each level has units of data, for
+     * in the sentence's view, article, paragraph, sentence are 3
-             * example,
+     * levels.
-             * in the sentence's view, article, paragraph, sentence are 3
+     */
-             * levels.
+    size_t NumLevels() const { return lod_.size(); }
-             */
-            size_t NumLevels() const { return lod_.size(); }
+    /*
+     * Number of elements in a level.
-            /*
+     */
-             * Number of elements in a level.
+    size_t NumElements(size_t level = 0) const {
-             */
+        //    PADDLE_ENFORCE_LT(level, NumLevels());
-            size_t NumElements(size_t level = 0) const {
+        // the last offset is the end of last element
-                //    PADDLE_ENFORCE_LT(level, NumLevels());
+        return (lod_)[level].size() - 1;
-                // the last offset is the end of last element
+    }
-                return (lod_)[level].size() - 1;
-            }
+  private:
+    LoD lod_;
-          private:
+};
-            LoD lod_;
-        };
+/*
+ * Expand the `source` to fit the LoD of `lod`. For example, a `source`
-        /*
+ * LoDTensor is
-         * Expand the `source` to fit the LoD of `lod`. For example, a `source`
+ *  - LoD: [0, 2]
-         * LoDTensor is
+ *  - tensor: [a0, a1]
-         *  - LoD: [0, 2]
+ * a `lod` is
-         *  - tensor: [a0, a1]
+ *  - LoD: [0 3 5]
-         * a `lod` is
+ * returns a new LoDTensor
-         *  - LoD: [0 3 5]
+ *  - [a0 a0 a0 a1 a1]
-         * returns a new LoDTensor
+ */
-         *  - [a0 a0 a0 a1 a1]
+template <typename T>
-         */
+LoDTensor LodExpand(const LoDTensor &source, const LoD &lod, size_t level) {
-        template <typename T>
+    LoD abs_lod = ToAbsOffset(lod);
-        LoDTensor LodExpand(const LoDTensor &source, const LoD &lod,
+    const auto &lod_level = lod[level];
-                            size_t level) {
+    size_t num_instances = source.dims()[0];
-            LoD abs_lod = ToAbsOffset(lod);
-            const auto &lod_level = lod[level];
+    // new tensor
-            size_t num_instances = source.dims()[0];
+    LoDTensor tensor;
+    tensor.set_lod(lod);
-            // new tensor
+    auto dims = source.dims();
-            LoDTensor tensor;
+    dims[0] = lod_level.back();
-            tensor.set_lod(lod);
+    tensor.Resize(dims);
-            auto dims = source.dims();
+    tensor.mutable_data<T>();
-            dims[0] = lod_level.back();
-            tensor.Resize(dims);
+    //  PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
-            tensor.mutable_data<T>();
+    for (size_t ins = 0; ins < num_instances; ins++) {
+        for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
-            //  PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
+            auto slice = tensor.Slice(elem, elem + 1);
-            for (size_t ins = 0; ins < num_instances; ins++) {
+            TensorCopy(source.Slice(ins, ins + 1), &slice);
-                for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1];
-                     elem++) {
-                    auto slice = tensor.Slice(elem, elem + 1);
-                    TensorCopy(source.Slice(ins, ins + 1), &slice);
-                }
-            }
-            return tensor;
        }
+    }
-        // Get the absolute offset of a lod[start_level][start_idx:end_idx] and
+    return tensor;
-        // relative length of details for every levels(i.e., [start_level: ]).
+}
-        //
-        // For example,
+// Get the absolute offset of a lod[start_level][start_idx:end_idx] and
-        //   lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]]
+// relative length of details for every levels(i.e., [start_level: ]).
-        //   start_level = 0
+//
-        //   start_idx = 1
+// For example,
-        //   end_idx = 3
+//   lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]]
-        //
+//   start_level = 0
-        // Returns:
+//   start_idx = 1
-        //  LoD = [[1, 4], [2, 4, 2, 3, 2]]
+//   end_idx = 3
-        //  pair<size_t, size_t> = {11, 24}
+//
-        std::pair<LoD, std::pair<size_t, size_t>>
+// Returns:
-        GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
+//  LoD = [[1, 4], [2, 4, 2, 3, 2]]
-                                   size_t end_idx, size_t start_level);
+//  pair<size_t, size_t> = {11, 24}
+std::pair<LoD, std::pair<size_t, size_t>>
-        void AppendLoD(LoD *lod, const LoD &lod_length);
+GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, size_t end_idx,
+                           size_t start_level);
-        /*
-         * Serialize/Desiralize LoDTensor to std::ostream
+void AppendLoD(LoD *lod, const LoD &lod_length);
-         * You can pass ofstream or ostringstream to serilize to file
-         * or to a in memory string. GPU tensor will be copied to CPU.
+/*
-         */
+ * Serialize/Desiralize LoDTensor to std::ostream
-        void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
+ * You can pass ofstream or ostringstream to serilize to file
+ * or to a in memory string. GPU tensor will be copied to CPU.
-        void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
+ */
+void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
-    } // namespace framework
+void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/op_desc.cpp
+++ b/src/framework/op_desc.cpp
@@ -5,58 +5,55 @@
 #include "op_desc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        OpDesc::OpDesc(const proto::OpDesc &desc) : desc_(desc) {
+OpDesc::OpDesc(const proto::OpDesc &desc) : desc_(desc) {
-            for (int i = 0; i < desc_.inputs_size(); ++i) {
+    for (int i = 0; i < desc_.inputs_size(); ++i) {
-                const proto::OpDesc::Var &var = desc_.inputs(i);
+        const proto::OpDesc::Var &var = desc_.inputs(i);
-                std::vector<std::string> &args = inputs_[var.parameter()];
+        std::vector<std::string> &args = inputs_[var.parameter()];
-                int arg_size = var.arguments_size();
+        int arg_size = var.arguments_size();
-                for (int j = 0; j < arg_size; ++j) {
+        for (int j = 0; j < arg_size; ++j) {
-                    args.push_back(var.arguments(j));
+            args.push_back(var.arguments(j));
-                }
-            }
-            for (int i = 0; i < desc_.outputs_size(); ++i) {
-                const proto::OpDesc::Var &var = desc_.outputs(i);
-                std::vector<std::string> &args = outputs_[var.parameter()];
-                int arg_size = var.arguments_size();
-                for (int j = 0; j < arg_size; ++j) {
-                    args.push_back(var.arguments(j));
-                }
-            }
-            for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
-                std::string attr_name = attr.name();
-                if (attr.type() != proto::AttrType::BLOCK) {
-                    attrs_[attr_name] = Attribute::GetAttrValue(attr);
-                    //      if (attr.type() == proto::AttrType::INT){
-                    //        std::cout << " attrName " << attr_name << " " <<
-                    //        attrs_[attr_name].Get<int>() << std::endl;
-                    //      }
-                }
-            }
        }
+    }
-        const std::vector<std::string> &
-        OpDesc::Input(const std::string &name) const {
+    for (int i = 0; i < desc_.outputs_size(); ++i) {
-            return inputs_.find(name)->second;
+        const proto::OpDesc::Var &var = desc_.outputs(i);
+        std::vector<std::string> &args = outputs_[var.parameter()];
+        int arg_size = var.arguments_size();
+        for (int j = 0; j < arg_size; ++j) {
+            args.push_back(var.arguments(j));
        }
+    }
-        const std::vector<std::string> &
-        OpDesc::Output(const std::string &name) const {
+    for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
-            return outputs_.find(name)->second;
+        std::string attr_name = attr.name();
+        if (attr.type() != proto::AttrType::BLOCK) {
+            attrs_[attr_name] = Attribute::GetAttrValue(attr);
+            //      if (attr.type() == proto::AttrType::INT){
+            //        std::cout << " attrName " << attr_name << " " <<
+            //        attrs_[attr_name].Get<int>() << std::endl;
+            //      }
        }
+    }
+}
-        Attribute OpDesc::GetAttr(const std::string &name) const {
+const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
-            auto it = attrs_.find(name);
+    return inputs_.find(name)->second;
-            return it->second;
+}
-        }
-        const std::unordered_map<std::string, Attribute> &
+const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
-        OpDesc::GetAttrMap() const {
+    return outputs_.find(name)->second;
-            return attrs_;
+}
-        }
+Attribute OpDesc::GetAttr(const std::string &name) const {
+    auto it = attrs_.find(name);
+    return it->second;
+}
+const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
+    return attrs_;
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/op_desc.h
+++ b/src/framework/op_desc.h
@@ -23,31 +23,29 @@ SOFTWARE.
 #include "paddle_mobile_object.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class OpDesc : PaddleMobileObject {
+class OpDesc : PaddleMobileObject {
-          public:
+  public:
-            OpDesc(const proto::OpDesc &desc);
+    OpDesc(const proto::OpDesc &desc);
-            const std::vector<std::string> &
+    const std::vector<std::string> &Input(const std::string &name) const;
-            Input(const std::string &name) const;
+    const std::vector<std::string> &Output(const std::string &name) const;
-            const std::vector<std::string> &
+    Attribute GetAttr(const std::string &name) const;
-            Output(const std::string &name) const;
-            Attribute GetAttr(const std::string &name) const;
-            const VariableNameMap &GetInputs() { return inputs_; }
+    const VariableNameMap &GetInputs() { return inputs_; }
-            const VariableNameMap &GetOutputs() { return outputs_; }
+    const VariableNameMap &GetOutputs() { return outputs_; }
-            const AttributeMap &GetAttrMap() const;
+    const AttributeMap &GetAttrMap() const;
-            const std::string &Type() { return desc_.type(); };
+    const std::string &Type() { return desc_.type(); };
-          private:
+  private:
-            proto::OpDesc desc_;
+    proto::OpDesc desc_;
-            VariableNameMap inputs_;
+    VariableNameMap inputs_;
-            VariableNameMap outputs_;
+    VariableNameMap outputs_;
-            AttributeMap attrs_;
+    AttributeMap attrs_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/op_info.h
+++ b/src/framework/op_info.h
@@ -22,74 +22,73 @@ SOFTWARE.
 #include "framework.pb.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype> struct OpInfo {
+template <typename Dtype> struct OpInfo {
-            OpCreator<Dtype> creator_;
+    OpCreator<Dtype> creator_;
-            const OpCreator<Dtype> &Creator() const {
+    const OpCreator<Dtype> &Creator() const {
-                //    PADDLE_ENFORCE_NOT_NULL(creator_,
+        //    PADDLE_ENFORCE_NOT_NULL(creator_,
-                //                            "Operator Creator has not been
+        //                            "Operator Creator has not been
-                //                            registered");
+        //                            registered");
-                return creator_;
+        return creator_;
-            }
+    }
-        };
+};
-        template <typename Dtype> class OpInfoMap;
+template <typename Dtype> class OpInfoMap;
-        template <typename Dtype>
+template <typename Dtype> static OpInfoMap<Dtype> *g_op_info_map = nullptr;
-        static OpInfoMap<Dtype> *g_op_info_map = nullptr;
+template <typename Dtype> class OpInfoMap {
-        template <typename Dtype> class OpInfoMap {
+  public:
-          public:
+    static OpInfoMap &Instance() {
-            static OpInfoMap &Instance() {
+        if (g_op_info_map<Dtype> == nullptr) {
-                if (g_op_info_map<Dtype> == nullptr) {
+            g_op_info_map<Dtype> = new OpInfoMap();
-                    g_op_info_map<Dtype> = new OpInfoMap();
+        }
-                }
+        return *g_op_info_map<Dtype>;
-                return *g_op_info_map<Dtype>;
+    };
-            };
+    bool Has(const std::string &op_type) const {
-            bool Has(const std::string &op_type) const {
+        return map_.find(op_type) != map_.end();
-                return map_.find(op_type) != map_.end();
+    }
-            }
+    void Insert(const std::string &type, const OpInfo<Dtype> &info) {
-            void Insert(const std::string &type, const OpInfo<Dtype> &info) {
+        //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
-                //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
+        //    registered", type);
-                //    registered", type);
+        map_.insert({type, info});
-                map_.insert({type, info});
+    }
-            }
+    const OpInfo<Dtype> &Get(const std::string &type) const {
-            const OpInfo<Dtype> &Get(const std::string &type) const {
+        auto op_info_ptr = GetNullable(type);
-                auto op_info_ptr = GetNullable(type);
+        //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
-                //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
+        //    been
-                //    been
+        //    registered",
-                //    registered",
+        //                            type);
-                //                            type);
+        return *op_info_ptr;
-                return *op_info_ptr;
+    }
-            }
+    const OpInfo<Dtype> *GetNullable(const std::string &type) const {
-            const OpInfo<Dtype> *GetNullable(const std::string &type) const {
+        auto it = map_.find(type);
-                auto it = map_.find(type);
+        if (it == map_.end()) {
-                if (it == map_.end()) {
+            return nullptr;
-                    return nullptr;
+        } else {
-                } else {
+            return &it->second;
-                    return &it->second;
+        }
-                }
+    }
-            }
+    const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
-            const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
+        return map_;
-                return map_;
+    }
-            }
+    std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
-            std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
+        return &map_;
-                return &map_;
+    }
-            }
+  private:
-          private:
+    OpInfoMap() = default;
-            OpInfoMap() = default;
+    std::unordered_map<std::string, OpInfo<Dtype>> map_;
-            std::unordered_map<std::string, OpInfo<Dtype>> map_;
+    //  DISABLE_COPY_AND_ASSIGN(OpInfoMap);
-            //  DISABLE_COPY_AND_ASSIGN(OpInfoMap);
+};
-        };
+} // namespace framework
-    } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/op_kernel_type.h
+++ b/src/framework/op_kernel_type.h
@@ -22,51 +22,44 @@ SOFTWARE.
 #include "framework.pb.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        struct OpKernelType {
+struct OpKernelType {
-            struct Hash {
+    struct Hash {
-                size_t operator()(const OpKernelType &key) const {
+        size_t operator()(const OpKernelType &key) const {
-                    int data_type = static_cast<int>(key.data_type_)
+            int data_type = static_cast<int>(key.data_type_) << LEFT_SHIFT;
-                                    << LEFT_SHIFT;
+            int data_layout = static_cast<int>(key.data_layout_)
-                    int data_layout = static_cast<int>(key.data_layout_)
+                              << (LEFT_SHIFT * 2);
-                                      << (LEFT_SHIFT * 2);
-                    std::hash<int> hasher;
+            std::hash<int> hasher;
-                    return hasher(data_type + data_layout);
+            return hasher(data_type + data_layout);
-                }
+        }
-            };
+    };
-            // place, data_type, library_type kinds less than 2^8
+    // place, data_type, library_type kinds less than 2^8
-            constexpr static int LEFT_SHIFT = 8;
+    constexpr static int LEFT_SHIFT = 8;
-            proto::VarType::Type data_type_;
+    proto::VarType::Type data_type_;
-            DataLayout data_layout_;
+    DataLayout data_layout_;
-            OpKernelType(proto::VarType::Type data_type,
+    OpKernelType(proto::VarType::Type data_type,
-                         DataLayout data_layout = DataLayout::kAnyLayout)
+                 DataLayout data_layout = DataLayout::kAnyLayout)
-                : data_type_(data_type), data_layout_(data_layout) {}
+        : data_type_(data_type), data_layout_(data_layout) {}
-            bool operator==(const OpKernelType &o) const {
+    bool operator==(const OpKernelType &o) const {
-                return data_type_ == o.data_type_ &&
+        return data_type_ == o.data_type_ && data_layout_ == o.data_layout_;
-                       data_layout_ == o.data_layout_;
+    }
-            }
-            bool operator!=(const OpKernelType &o) const {
+    bool operator!=(const OpKernelType &o) const { return !(*this == o); }
-                return !(*this == o);
+};
-            }
-        };
-        inline bool NeedTransformLayout(const DataLayout &l,
+inline bool NeedTransformLayout(const DataLayout &l, const DataLayout &r) {
-                                        const DataLayout &r) {
+    return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
-            return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout &&
+}
-                   l != r;
-        }
-        inline bool TransFromNeeded(const OpKernelType &l,
+inline bool TransFromNeeded(const OpKernelType &l, const OpKernelType &r) {
-                                    const OpKernelType &r) {
+    return (l.data_type_ != r.data_type_) ||
-            return (l.data_type_ != r.data_type_) ||
+           NeedTransformLayout(l.data_layout_, r.data_layout_);
-                   NeedTransformLayout(l.data_layout_, r.data_layout_);
+}
-        }
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/op_proto_maker.h
+++ b/src/framework/op_proto_maker.h
@@ -19,8 +19,8 @@ SOFTWARE.
 #pragma once
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        // this class not only make proto but also init attribute checkers.
+// this class not only make proto but also init attribute checkers.
-        class OpProtoAndCheckerMaker {};
+class OpProtoAndCheckerMaker {};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -20,23 +20,23 @@ SOFTWARE.
 #include "op_info.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype>
+template <typename Dtype>
-        OperatorBase<Dtype>::OperatorBase(const std::string &type,
+OperatorBase<Dtype>::OperatorBase(const std::string &type,
-                                          const VariableNameMap &inputs,
+                                  const VariableNameMap &inputs,
-                                          const VariableNameMap &outputs,
+                                  const VariableNameMap &outputs,
-                                          const AttributeMap &attrs,
+                                  const AttributeMap &attrs,
-                                          std::shared_ptr<Scope> scope)
+                                  std::shared_ptr<Scope> scope)
-            : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs),
+    : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs),
-              scope_(scope) {
+      scope_(scope) {
-            CheckAllInputOutputSet();
+    CheckAllInputOutputSet();
-        }
+}
-        template <typename Dtype>
+template <typename Dtype>
-        void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
+void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
-        template class OperatorBase<CPU>;
+template class OperatorBase<CPU>;
-        template class OperatorWithKernel<CPU>;
+template class OperatorWithKernel<CPU>;
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -33,68 +33,64 @@ SOFTWARE.
 #include "variable.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        static std::unordered_map<std::string, std::vector<std::string>>
+static std::unordered_map<std::string, std::vector<std::string>>
-            op_input_output_key = {
+    op_input_output_key = {
-                {"conv2d", {"Input", "Output"}},   {"relu", {"X", "Out"}},
+        {"conv2d", {"Input", "Output"}},   {"relu", {"X", "Out"}},
-                {"softmax", {"X", "Out"}},         {"mul", {"X", "Out"}},
+        {"softmax", {"X", "Out"}},         {"mul", {"X", "Out"}},
-                {"elementwise_add", {"X", "Out"}}, {"pool2d", {"X", "Out"}},
+        {"elementwise_add", {"X", "Out"}}, {"pool2d", {"X", "Out"}},
-                {"batch_norm", {"X", "Y"}},        {"lrn", {"X", "Out"}},
+        {"batch_norm", {"X", "Y"}},        {"lrn", {"X", "Out"}},
-                {"concat", {"X", "Out"}},
+        {"concat", {"X", "Out"}},
-        };
+};
-        template <typename Dtype> class OperatorBase : PaddleMobileObject {
+template <typename Dtype> class OperatorBase : PaddleMobileObject {
-          public:
+  public:
-            OperatorBase(const std::string &type, const VariableNameMap &inputs,
+    OperatorBase(const std::string &type, const VariableNameMap &inputs,
-                         const VariableNameMap &outputs,
+                 const VariableNameMap &outputs, const AttributeMap &attrs,
-                         const AttributeMap &attrs,
+                 std::shared_ptr<Scope> scope);
-                         std::shared_ptr<Scope> scope);
+    virtual ~OperatorBase() {}
-            virtual ~OperatorBase() {}
+    virtual void Run() const = 0;
-            virtual void Run() const = 0;
-            const VariableNameMap &Inputs() const { return inputs_; }
+    const VariableNameMap &Inputs() const { return inputs_; }
-            const VariableNameMap &Outputs() const { return outputs_; }
+    const VariableNameMap &Outputs() const { return outputs_; }
-            const std::string &Type() const { return type_; }
+    const std::string &Type() const { return type_; }
-            const AttributeMap &Attrs() const { return attrs_; }
+    const AttributeMap &Attrs() const { return attrs_; }
-            void ClearVariables() const {
+    void ClearVariables() const {
-                if (this->scope_) {
+        if (this->scope_) {
-                    this->scope_->EraseVars(this->inputs_.at("Filter"));
+            this->scope_->EraseVars(this->inputs_.at("Filter"));
-                    this->scope_->EraseVars(this->inputs_.at("Input"));
+            this->scope_->EraseVars(this->inputs_.at("Input"));
-                }
+        }
-            }
+    }
-          protected:
+  protected:
-            std::shared_ptr<Scope> scope_;
+    std::shared_ptr<Scope> scope_;
-            std::string type_;
+    std::string type_;
-            VariableNameMap inputs_;
+    VariableNameMap inputs_;
-            VariableNameMap outputs_;
+    VariableNameMap outputs_;
-            AttributeMap attrs_;
+    AttributeMap attrs_;
-          private:
+  private:
-            void CheckAllInputOutputSet() const;
+    void CheckAllInputOutputSet() const;
-        };
+};
-        template <typename Dtype>
+template <typename Dtype>
-        class OperatorWithKernel : public OperatorBase<Dtype> {
+class OperatorWithKernel : public OperatorBase<Dtype> {
-          public:
+  public:
-            OperatorWithKernel(const std::string &type,
+    OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
-                               const VariableNameMap &inputs,
+                       const VariableNameMap &outputs,
-                               const VariableNameMap &outputs,
+                       const AttributeMap &attrs, std::shared_ptr<Scope> scope)
-                               const AttributeMap &attrs,
+        : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
-                               std::shared_ptr<Scope> scope)
+    virtual void InferShape() const = 0;
-                : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
+    virtual void Run() const = 0;
-            virtual void InferShape() const = 0;
+};
-            virtual void Run() const = 0;
-        };
-        template <typename Dtype, typename P>
+template <typename Dtype, typename P> class OpKernelBase : PaddleMobileObject {
-        class OpKernelBase : PaddleMobileObject {
+  public:
-          public:
+    virtual void Compute(const P &para) const = 0;
-            virtual void Compute(const P &para) const = 0;
-            virtual ~OpKernelBase() = default;
+    virtual ~OpKernelBase() = default;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/paddle_mobile_object.h
+++ b/src/framework/paddle_mobile_object.h
@@ -23,14 +23,14 @@ SOFTWARE.
 namespace paddle_mobile {
-    class PaddleMobileObject {
+class PaddleMobileObject {
-      public:
+  public:
-        virtual std::string ToString() {
+    virtual std::string ToString() {
-            char address[128] = {0};
+        char address[128] = {0};
-            sprintf(address, "%p", this);
+        sprintf(address, "%p", this);
-            return std::string(address);
+        return std::string(address);
-        }
+    }
-      private:
+  private:
-    };
+};
 } // namespace paddle_mobile
--- a/src/framework/program-optimize/node.cpp
+++ b/src/framework/program-optimize/node.cpp
@@ -21,44 +21,44 @@ SOFTWARE.
 #include "node.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        Node &Node::operator>(const Node &out) {
+Node &Node::operator>(const Node &out) {
-            std::shared_ptr<Node> node = std::make_shared<Node>(Node(out));
+    std::shared_ptr<Node> node = std::make_shared<Node>(Node(out));
-            outputs_.push_back(node);
+    outputs_.push_back(node);
-            return *node;
+    return *node;
-        }
+}
-        bool Node::operator==(const Node &in) {
+bool Node::operator==(const Node &in) {
-            if (in.type_ == this->type_) {
+    if (in.type_ == this->type_) {
-                if (this->outputs_.size() == in.outputs_.size()) {
+        if (this->outputs_.size() == in.outputs_.size()) {
-                    for (int i = 0; i < outputs_.size(); ++i) {
+            for (int i = 0; i < outputs_.size(); ++i) {
-                        if (!(*outputs_[i] == *in.outputs_[i])) {
+                if (!(*outputs_[i] == *in.outputs_[i])) {
-                            return false;
-                        }
-                    }
-                } else {
                    return false;
                }
-            } else {
-                return false;
            }
-            return true;
+        } else {
+            return false;
        }
+    } else {
+        return false;
+    }
+    return true;
+}
-        std::string Node::ToString(std::string blank) const {
+std::string Node::ToString(std::string blank) const {
-            std::stringstream ss;
+    std::stringstream ss;
-            ss << type_ << ": \n";
+    ss << type_ << ": \n";
-            for (int i = 0; i < outputs_.size(); ++i) {
+    for (int i = 0; i < outputs_.size(); ++i) {
-                ss << blank << outputs_[i]->ToString(blank + "    ") << "";
+        ss << blank << outputs_[i]->ToString(blank + "    ") << "";
-            }
+    }
-            return ss.str();
+    return ss.str();
-        }
+}
-        std::string Node::ToString() const { return this->ToString("    "); }
+std::string Node::ToString() const { return this->ToString("    "); }
-        Print &operator<<(Print &printer, const Node &node) {
+Print &operator<<(Print &printer, const Node &node) {
-            printer << node.ToString();
+    printer << node.ToString();
-            return printer;
+    return printer;
-        }
-    }
 }
+} // namespace framework
+} // namespace paddle_mobile
--- a/src/framework/program-optimize/node.h
+++ b/src/framework/program-optimize/node.h
@@ -25,22 +25,22 @@ SOFTWARE.
 #include "framework/paddle_mobile_object.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class Node : PaddleMobileObject {
+class Node : PaddleMobileObject {
-          public:
+  public:
-            Node(const std::string &type) : type_(type) {}
+    Node(const std::string &type) : type_(type) {}
-            Node &operator>(const Node &out);
+    Node &operator>(const Node &out);
-            bool operator==(const Node &in);
+    bool operator==(const Node &in);
-            std::string ToString() const;
+    std::string ToString() const;
-          private:
+  private:
-            std::string ToString(std::string blank) const;
+    std::string ToString(std::string blank) const;
-            std::vector<std::shared_ptr<Node>> outputs_;
+    std::vector<std::shared_ptr<Node>> outputs_;
-            std::string type_;
+    std::string type_;
-        };
+};
-        Print &operator<<(Print &printer, const Node &node);
+Print &operator<<(Print &printer, const Node &node);
-    }
+} // namespace framework
-}
+} // namespace paddle_mobile
--- a/src/framework/program-optimize/program_optimize.cpp
+++ b/src/framework/program-optimize/program_optimize.cpp
@@ -19,7 +19,7 @@ SOFTWARE.
 #include "program_optimize.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
+std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
-    }
+} // namespace framework
-}
+} // namespace paddle_mobile
--- a/src/framework/program-optimize/program_optimize.h
+++ b/src/framework/program-optimize/program_optimize.h
@@ -21,15 +21,15 @@ SOFTWARE.
 #include "framework/program_desc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class ProgramOptimize {
+class ProgramOptimize {
-          public:
+  public:
-            ProgramOptimize(std::shared_ptr<ProgramDesc> ori_desc)
+    ProgramOptimize(std::shared_ptr<ProgramDesc> ori_desc)
-                : ori_desc_(ori_desc) {}
+        : ori_desc_(ori_desc) {}
-            std::shared_ptr<ProgramDesc> Optimize();
+    std::shared_ptr<ProgramDesc> Optimize();
-          private:
+  private:
-            std::shared_ptr<ProgramDesc> ori_desc_;
+    std::shared_ptr<ProgramDesc> ori_desc_;
-        };
+};
-    }
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/program.cpp
+++ b/src/framework/program.cpp
@@ -17,5 +17,5 @@ SOFTWARE.
 ==============================================================================*/
 namespace paddle_mobile {
-    namespace framework {}
+namespace framework {}
 } // namespace paddle_mobile
--- a/src/framework/program.h
+++ b/src/framework/program.h
@@ -24,17 +24,17 @@ SOFTWARE.
 #include "scope.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype, Precision P = Precision::FP32>
-        class Program : PaddleMobileObject {
+class Program : PaddleMobileObject {
-          public:
+  public:
-            std::shared_ptr<ProgramDesc> originProgram;
+    std::shared_ptr<ProgramDesc> originProgram;
-            std::shared_ptr<ProgramDesc> optimizeProgram;
+    std::shared_ptr<ProgramDesc> optimizeProgram;
-            std::shared_ptr<Scope> scope;
+    std::shared_ptr<Scope> scope;
-          private:
+  private:
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/program_desc.cpp
+++ b/src/framework/program_desc.cpp
@@ -5,18 +5,18 @@
 #include "program_desc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) : desc_(desc) {
+ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) : desc_(desc) {
-            for (auto &block_desc : *desc_.mutable_blocks()) {
+    for (auto &block_desc : *desc_.mutable_blocks()) {
-                // new framework::BlockDesc(block_desc)
+        // new framework::BlockDesc(block_desc)
-                blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
+        blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
-            }
+    }
-        }
+}
-        std::shared_ptr<BlockDesc> ProgramDesc::Block(size_t idx) {
+std::shared_ptr<BlockDesc> ProgramDesc::Block(size_t idx) {
-            return blocks_[idx];
+    return blocks_[idx];
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/program_desc.h
+++ b/src/framework/program_desc.h
@@ -25,20 +25,18 @@ SOFTWARE.
 #include "paddle_mobile_object.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class ProgramDesc : PaddleMobileObject {
+class ProgramDesc : PaddleMobileObject {
-          public:
+  public:
-            ProgramDesc(const proto::ProgramDesc &desc);
+    ProgramDesc(const proto::ProgramDesc &desc);
-            std::shared_ptr<BlockDesc> Block(size_t idx);
+    std::shared_ptr<BlockDesc> Block(size_t idx);
-            const std::vector<std::shared_ptr<BlockDesc>> &Blocks() {
+    const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; };
-                return blocks_;
-            };
-          private:
+  private:
-            std::vector<std::shared_ptr<BlockDesc>> blocks_;
+    std::vector<std::shared_ptr<BlockDesc>> blocks_;
-            proto::ProgramDesc desc_;
+    proto::ProgramDesc desc_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/scope.cc
+++ b/src/framework/scope.cc
@@ -4,116 +4,116 @@
 #include <vector>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        Scope &Scope::NewScope() const {
+Scope &Scope::NewScope() const {
-            std::unique_lock<std::mutex> lock(mutex_);
+    std::unique_lock<std::mutex> lock(mutex_);
-            kids_.push_back(new Scope(this));
+    kids_.push_back(new Scope(this));
-            return *kids_.back();
+    return *kids_.back();
-        }
+}
-        Variable *Scope::Var(const std::string &name) {
+Variable *Scope::Var(const std::string &name) {
-            auto *pvar = FindVarLocally(name);
+    auto *pvar = FindVarLocally(name);
-            if (pvar != nullptr) {
+    if (pvar != nullptr) {
-                return pvar;
+        return pvar;
-            };
+    };
-            pvar = new Variable;
+    pvar = new Variable;
-            vars_[name] = pvar;
+    vars_[name] = pvar;
-            pvar->name_ = &(vars_.find(name)->first);
+    pvar->name_ = &(vars_.find(name)->first);
-            return pvar;
+    return pvar;
-        }
+}
-        //            Variable* Scope::Var(std::string* name) {
+//            Variable* Scope::Var(std::string* name) {
-        //                auto var_name = string::Sprintf("%p.%d", this,
+//                auto var_name = string::Sprintf("%p.%d", this,
-        //                vars_.size());
+//                vars_.size());
-        //                if (name != nullptr) {
+//                if (name != nullptr) {
-        //                    *name = var_name;
+//                    *name = var_name;
-        //                }
+//                }
-        //                return Var(var_name);
+//                return Var(var_name);
-        //            }
+//            }
-        Variable *Scope::FindVar(const std::string &name) const {
+Variable *Scope::FindVar(const std::string &name) const {
-            auto *pvar = FindVarLocally(name);
+    auto *pvar = FindVarLocally(name);
-            if (pvar != nullptr) {
+    if (pvar != nullptr) {
-                return pvar;
+        return pvar;
-            }
+    }
-            return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
+    return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
-        }
+}
-        const Scope *Scope::FindScope(const Variable *var) const {
+const Scope *Scope::FindScope(const Variable *var) const {
-            for (auto &name_var : vars_) {
+    for (auto &name_var : vars_) {
-                if (name_var.second == var) {
+        if (name_var.second == var) {
-                    return this;
+            return this;
-                }
-            }
-            return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
        }
+    }
+    return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
+}
-        void Scope::DropKids() {
+void Scope::DropKids() {
-            for (Scope *s : kids_) {
+    for (Scope *s : kids_) {
-                delete s;
+        delete s;
-            }
+    }
-            kids_.clear();
+    kids_.clear();
-        }
+}
-        std::vector<std::string> Scope::LocalVarNames() const {
+std::vector<std::string> Scope::LocalVarNames() const {
-            std::vector<std::string> known_vars;
+    std::vector<std::string> known_vars;
-            known_vars.reserve(vars_.size());
+    known_vars.reserve(vars_.size());
-            for (auto &name_var : vars_) {
+    for (auto &name_var : vars_) {
-                known_vars.emplace_back(name_var.first);
+        known_vars.emplace_back(name_var.first);
-            }
+    }
-            return known_vars;
+    return known_vars;
-        }
+}
-        void Scope::DeleteScope(Scope *scope) const {
+void Scope::DeleteScope(Scope *scope) const {
-            std::unique_lock<std::mutex> lock(mutex_);
+    std::unique_lock<std::mutex> lock(mutex_);
-            auto it = std::find(kids_.begin(), kids_.end(), scope);
+    auto it = std::find(kids_.begin(), kids_.end(), scope);
-            kids_.erase(it);
+    kids_.erase(it);
-            delete scope;
+    delete scope;
-            // deferent
+    // deferent
-        }
+}
-        void Scope::EraseVars(const std::vector<std::string> &var_names) {
+void Scope::EraseVars(const std::vector<std::string> &var_names) {
-            std::set<std::string> var_set(var_names.begin(), var_names.end());
+    std::set<std::string> var_set(var_names.begin(), var_names.end());
-            for (auto it = vars_.begin(); it != vars_.end();) {
+    for (auto it = vars_.begin(); it != vars_.end();) {
-                if (var_set.find(it->first) != var_set.end()) {
+        if (var_set.find(it->first) != var_set.end()) {
-                    delete it->second;
+            delete it->second;
-                    it = vars_.erase(it);
+            it = vars_.erase(it);
-                } else {
+        } else {
-                    ++it;
+            ++it;
-                }
-            }
        }
+    }
+}
-        void Scope::Rename(const std::string &origin_name,
+void Scope::Rename(const std::string &origin_name,
-                           const std::string &new_name) const {
+                   const std::string &new_name) const {
-            auto origin_it = vars_.find(origin_name);
+    auto origin_it = vars_.find(origin_name);
-            if (origin_it == vars_.end()) {
+    if (origin_it == vars_.end()) {
-                return;
+        return;
-            }
+    }
-            auto new_it = vars_.find(new_name);
+    auto new_it = vars_.find(new_name);
-            if (new_it != vars_.end()) {
+    if (new_it != vars_.end()) {
-                return;
+        return;
-            }
+    }
-            vars_[new_name] = origin_it->second;
+    vars_[new_name] = origin_it->second;
-            vars_.erase(origin_it);
+    vars_.erase(origin_it);
-        }
+}
-        //
+//
-        //            std::string Scope::Rename(const std::string& origin_name)
+//            std::string Scope::Rename(const std::string& origin_name)
-        //            const {
+//            const {
-        //                auto var_name = string::Sprintf("%p.%d", this,
+//                auto var_name = string::Sprintf("%p.%d", this,
-        //                vars_.size());
+//                vars_.size());
-        //                Rename(origin_name, var_name);
+//                Rename(origin_name, var_name);
-        //                return var_name;
+//                return var_name;
-        //            }
+//            }
-        Variable *Scope::FindVarLocally(const std::string &name) const {
+Variable *Scope::FindVarLocally(const std::string &name) const {
-            auto it = vars_.find(name);
+    auto it = vars_.find(name);
-            if (it != vars_.end()) {
+    if (it != vars_.end()) {
-                return it->second;
+        return it->second;
-            }
+    }
-            return nullptr;
+    return nullptr;
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/scope.h
+++ b/src/framework/scope.h
@@ -24,58 +24,58 @@ SOFTWARE.
 #include <unordered_map> //std::unordered_map
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class Scope {
+class Scope {
-          public:
+  public:
-            Scope() {}
+    Scope() {}
-            ~Scope() {}
+    ~Scope() {}
-            Scope &NewScope() const;
+    Scope &NewScope() const;
-            /// Create a variable with given name if it doesn't exist.
+    /// Create a variable with given name if it doesn't exist.
-            Variable *Var(const std::string &name);
+    Variable *Var(const std::string &name);
-            /// Create a variable with a scope-unique name.
+    /// Create a variable with a scope-unique name.
-            Variable *Var(std::string *name = nullptr);
+    Variable *Var(std::string *name = nullptr);
-            void EraseVars(const std::vector<std::string> &var_names);
+    void EraseVars(const std::vector<std::string> &var_names);
-            /// Find a variable in the scope or any of its ancestors.  Returns
+    /// Find a variable in the scope or any of its ancestors.  Returns
-            /// nullptr if cannot find.
+    /// nullptr if cannot find.
-            Variable *FindVar(const std::string &name) const;
+    Variable *FindVar(const std::string &name) const;
-            const Scope *parent() const { return parent_; }
+    const Scope *parent() const { return parent_; }
-            /// Find the scope or an ancestor scope that contains the given
+    /// Find the scope or an ancestor scope that contains the given
-            /// variable.
+    /// variable.
-            const Scope *FindScope(const Variable *var) const;
+    const Scope *FindScope(const Variable *var) const;
-            void DeleteScope(Scope *scope) const;
+    void DeleteScope(Scope *scope) const;
-            /// Drop all kids scopes belonged to this scope.
+    /// Drop all kids scopes belonged to this scope.
-            void DropKids();
+    void DropKids();
-            // enumerate all the variables current contains.
+    // enumerate all the variables current contains.
-            std::vector<std::string> LocalVarNames() const;
+    std::vector<std::string> LocalVarNames() const;
-            // Rename variable to a new name
+    // Rename variable to a new name
-            void Rename(const std::string &origin_name,
+    void Rename(const std::string &origin_name,
-                        const std::string &new_name) const;
+                const std::string &new_name) const;
-            // Rename variable to a new name and return the new name
+    // Rename variable to a new name and return the new name
-            std::string Rename(const std::string &origin_name) const;
+    std::string Rename(const std::string &origin_name) const;
-            Variable *FindVarLocally(const std::string &name) const;
+    Variable *FindVarLocally(const std::string &name) const;
-          private:
+  private:
-            // Call Scope::NewScope for a sub-scope.
+    // Call Scope::NewScope for a sub-scope.
-            explicit Scope(Scope const *parent) : parent_(parent) {}
+    explicit Scope(Scope const *parent) : parent_(parent) {}
-            mutable std::unordered_map<std::string, Variable *> vars_;
+    mutable std::unordered_map<std::string, Variable *> vars_;
-            mutable std::list<Scope *> kids_;
+    mutable std::list<Scope *> kids_;
-            Scope const *parent_{nullptr};
+    Scope const *parent_{nullptr};
-            mutable std::mutex mutex_;
+    mutable std::mutex mutex_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/selected_rows.h
+++ b/src/framework/selected_rows.h
@@ -24,59 +24,58 @@ SOFTWARE.
 #include "tensor.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class SelectedRows {
+class SelectedRows {
-          public:
+  public:
-            SelectedRows(const std::vector<int64_t> &rows,
+    SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
-                         const int64_t &height)
+        : rows_(rows), height_(height) {
-                : rows_(rows), height_(height) {
+        value_.reset(new Tensor());
-                value_.reset(new Tensor());
+    }
-            }
-            SelectedRows() {
+    SelectedRows() {
-                height_ = 0;
+        height_ = 0;
-                value_.reset(new Tensor());
+        value_.reset(new Tensor());
-            }
+    }
-            const Tensor &value() const { return *value_; }
+    const Tensor &value() const { return *value_; }
-            Tensor *mutable_value() { return value_.get(); }
+    Tensor *mutable_value() { return value_.get(); }
-            int64_t height() const { return height_; }
+    int64_t height() const { return height_; }
-            void set_height(int64_t height) { height_ = height; }
+    void set_height(int64_t height) { height_ = height; }
-            const std::vector<int64_t> &rows() const { return rows_; }
+    const std::vector<int64_t> &rows() const { return rows_; }
-            std::vector<int64_t> *mutable_rows() { return &rows_; }
+    std::vector<int64_t> *mutable_rows() { return &rows_; }
-            void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
+    void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
-            /**
+    /**
-             * get the index of id in rows
+     * get the index of id in rows
-             */
+     */
-            int64_t index(int64_t id) const {
+    int64_t index(int64_t id) const {
-                auto it = std::find(rows_.begin(), rows_.end(), id);
+        auto it = std::find(rows_.begin(), rows_.end(), id);
-                //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
+        //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
-                return static_cast<int64_t>(std::distance(rows_.begin(), it));
+        return static_cast<int64_t>(std::distance(rows_.begin(), it));
-            }
+    }
-            DDim GetCompleteDims() const {
+    DDim GetCompleteDims() const {
-                std::vector<int64_t> dims = vectorize(value_->dims());
+        std::vector<int64_t> dims = vectorize(value_->dims());
-                dims[0] = height_;
+        dims[0] = height_;
-                return make_ddim(dims);
+        return make_ddim(dims);
-            }
+    }
-          private:
+  private:
-            // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
+    // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
-            // here.
+    // here.
-            // SelectedRows are simply concated when adding together. Until a
+    // SelectedRows are simply concated when adding together. Until a
-            // SelectedRows add a Tensor, will the duplicate rows be handled.
+    // SelectedRows add a Tensor, will the duplicate rows be handled.
-            std::vector<int64_t> rows_;
+    std::vector<int64_t> rows_;
-            std::unique_ptr<Tensor> value_{nullptr};
+    std::unique_ptr<Tensor> value_{nullptr};
-            int64_t height_;
+    int64_t height_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -25,316 +25,310 @@ limitations under the License. */
 #include "memory/t_malloc.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename... T> struct SizeOfTypeFunctor;
+template <typename... T> struct SizeOfTypeFunctor;
-        template <typename T> struct SizeOfTypeFunctor<T> {
+template <typename T> struct SizeOfTypeFunctor<T> {
-            size_t operator()(std::type_index type) const {
+    size_t operator()(std::type_index type) const {
-                if (typeid(T).hash_code() == type.hash_code()) {
+        if (typeid(T).hash_code() == type.hash_code()) {
-                    return sizeof(T);
+            return sizeof(T);
-                } else {
+        } else {
-                    return 0UL;
+            return 0UL;
-                }
-            }
-        };
-        template <> struct SizeOfTypeFunctor<> {
-            size_t operator()(std::type_index type) const { return 0UL; }
-        };
-        template <typename HEAD, typename... TAIL>
-        struct SizeOfTypeFunctor<HEAD, TAIL...> {
-            size_t operator()(std::type_index type) const {
-                SizeOfTypeFunctor<HEAD> head;
-                size_t head_size = head(type);
-                if (head_size != 0) {
-                    return head_size;
-                }
-                SizeOfTypeFunctor<TAIL...> tail;
-                return tail(type);
-            }
-        };
-        static inline size_t SizeOfType(std::type_index type) {
-            SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool,
-                              size_t>
-                functor;
-            size_t size = functor(type);
-            //  PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s",
-            //  type.name());
-            return size;
        }
+    }
-        class LoDTensor;
+};
-        class Tensor {
+template <> struct SizeOfTypeFunctor<> {
-          public:
+    size_t operator()(std::type_index type) const { return 0UL; }
-            Tensor() : offset_(0) {}
+};
-            /*! Return a pointer to mutable memory block. */
+template <typename HEAD, typename... TAIL>
-            template <typename T> inline T *data() {
+struct SizeOfTypeFunctor<HEAD, TAIL...> {
-                check_memory_size();
+    size_t operator()(std::type_index type) const {
-                //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
+        SizeOfTypeFunctor<HEAD> head;
-                //                     holder_->type().hash_code() ==
+        size_t head_size = head(type);
-                //                     typeid(T).hash_code(),
+        if (head_size != 0) {
-                //                 "Tensor holds the wrong type, it holds %s",
+            return head_size;
-                //                 this->holder_->type().name());
+        }
-                return reinterpret_cast<T *>(
+        SizeOfTypeFunctor<TAIL...> tail;
-                    reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+        return tail(type);
-            }
+    }
+};
-            /*! Return a pointer to constant memory block. */
-            template <typename T> inline const T *data() const {
+static inline size_t SizeOfType(std::type_index type) {
-                check_memory_size();
+    SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t>
-                //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
+        functor;
-                //                     holder_->type().hash_code() ==
+    size_t size = functor(type);
-                //                     typeid(T).hash_code(),
+    //  PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s",
-                //                 "Tensor holds the wrong type, it holds %s",
+    //  type.name());
-                //                 this->holder_->type().name());
+    return size;
+}
-                return reinterpret_cast<const T *>(
-                    reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+class LoDTensor;
-            }
+class Tensor {
-            inline bool IsInitialized() const { return holder_ != nullptr; }
+  public:
+    Tensor() : offset_(0) {}
-            /**
-             * @brief   Return a pointer to mutable memory block.
+    /*! Return a pointer to mutable memory block. */
-             * @note    If not exist, then allocation.
+    template <typename T> inline T *data() {
-             */
+        check_memory_size();
-            template <typename T> inline T *mutable_data() {
+        //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
-                static_assert(std::is_pod<T>::value, "T must be POD");
+        //                     holder_->type().hash_code() ==
-                return reinterpret_cast<T *>(mutable_data(typeid(T)));
+        //                     typeid(T).hash_code(),
-            }
+        //                 "Tensor holds the wrong type, it holds %s",
+        //                 this->holder_->type().name());
-            inline void *mutable_data(std::type_index type) {
+        return reinterpret_cast<T *>(
-                if (holder_ != nullptr) {
+            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
-                    holder_->set_type(type);
+    }
-                }
-                //  PADDLE_ENFORCE_GE(numel(), 0,
+    /*! Return a pointer to constant memory block. */
-                //                    "When calling this method, the Tensor's
+    template <typename T> inline const T *data() const {
-                //                    numel must be
+        check_memory_size();
-                //                    " "equal or larger than zero. " "Please
+        //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
-                //                    check
+        //                     holder_->type().hash_code() ==
-                //                    Tensor::Resize has been called first.");
+        //                     typeid(T).hash_code(),
-                int64_t size = numel() * SizeOfType(type);
+        //                 "Tensor holds the wrong type, it holds %s",
-                /* some versions of boost::variant don't have operator!= */
+        //                 this->holder_->type().name());
-                if (holder_ == nullptr || holder_->size() < size + offset_) {
-                    holder_.reset(new PlaceholderImpl(size, type));
+        return reinterpret_cast<const T *>(
+            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
-                    offset_ = 0;
+    }
-                }
-                return reinterpret_cast<void *>(
+    inline bool IsInitialized() const { return holder_ != nullptr; }
-                    reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
-            }
+    /**
+     * @brief   Return a pointer to mutable memory block.
-            inline void *mutable_data() {
+     * @note    If not exist, then allocation.
-                //  PADDLE_ENFORCE(this->holder_ != nullptr,
+     */
-                //                 "Cannot invoke mutable data if current hold
+    template <typename T> inline T *mutable_data() {
-                //                 nothing.");
+        static_assert(std::is_pod<T>::value, "T must be POD");
-                return mutable_data(holder_->type());
+        return reinterpret_cast<T *>(mutable_data(typeid(T)));
-            }
+    }
-            /**
+    inline void *mutable_data(std::type_index type) {
-             * @brief     Return a pointer to mutable memory block.
+        if (holder_ != nullptr) {
-             *
+            holder_->set_type(type);
-             * @param[in] dims    The dimensions of the memory block.
+        }
-             * @param[in] place   The place of the memory block.
+        //  PADDLE_ENFORCE_GE(numel(), 0,
-             *
+        //                    "When calling this method, the Tensor's
-             * @note      If not exist, then allocation.
+        //                    numel must be
-             */
+        //                    " "equal or larger than zero. " "Please
-            template <typename T> inline T *mutable_data(DDim dims) {
+        //                    check
-                static_assert(std::is_pod<T>::value, "T must be POD");
+        //                    Tensor::Resize has been called first.");
-                Resize(dims);
+        int64_t size = numel() * SizeOfType(type);
-                return mutable_data<T>();
+        /* some versions of boost::variant don't have operator!= */
-            }
+        if (holder_ == nullptr || holder_->size() < size + offset_) {
+            holder_.reset(new PlaceholderImpl(size, type));
-            /*! Return the dimensions of the memory block. */
-            inline const DDim &dims() const { return dims_; }
+            offset_ = 0;
+        }
-            /*! Return the numel of the memory block. */
+        return reinterpret_cast<void *>(
-            inline int64_t numel() const { return product(dims_); }
+            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+    }
-            /*! Resize the dimensions of the memory block. */
-            inline Tensor &Resize(const DDim &dims) {
+    inline void *mutable_data() {
-                dims_ = dims;
+        //  PADDLE_ENFORCE(this->holder_ != nullptr,
-                return *this;
+        //                 "Cannot invoke mutable data if current hold
-            }
+        //                 nothing.");
+        return mutable_data(holder_->type());
-            /*! The internal of two tensors share the same memory block. */
+    }
-            inline Tensor &ShareDataWith(const Tensor &src) {
-                src.check_memory_size();
+    /**
-                *this = src;
+     * @brief     Return a pointer to mutable memory block.
-                return *this;
+     *
-            }
+     * @param[in] dims    The dimensions of the memory block.
+     * @param[in] place   The place of the memory block.
-            /**
+     *
-             * @brief  Return a sub-tensor of the given tensor.
+     * @note      If not exist, then allocation.
-             *
+     */
-             * @param[in] begin_idx   The index of the start row(inclusive) to
+    template <typename T> inline T *mutable_data(DDim dims) {
-             * slice.
+        static_assert(std::is_pod<T>::value, "T must be POD");
-             *                        The index number begins from 0.
+        Resize(dims);
-             * @param[in] end_idx     The index of the end row(exclusive) to
+        return mutable_data<T>();
-             * slice.
+    }
-             *                        The index number begins from 0.
-             */
+    /*! Return the dimensions of the memory block. */
-            inline Tensor Slice(int begin_idx, int end_idx) const {
+    inline const DDim &dims() const { return dims_; }
-                check_memory_size();
-                //  PADDLE_ENFORCE_GE(begin_idx, 0,
+    /*! Return the numel of the memory block. */
-                //                    "The start row index must be greater than
+    inline int64_t numel() const { return product(dims_); }
-                //                    0.");
-                //  PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is
+    /*! Resize the dimensions of the memory block. */
-                //  out of
+    inline Tensor &Resize(const DDim &dims) {
-                //  bound."); PADDLE_ENFORCE_LT(
+        dims_ = dims;
-                //      begin_idx, end_idx,
+        return *this;
-                //      "The start row index must be lesser than the end row
+    }
-                //      index.");
+    /*! The internal of two tensors share the same memory block. */
-                if (dims_[0] == 1) {
+    inline Tensor &ShareDataWith(const Tensor &src) {
-                    return *this;
+        src.check_memory_size();
-                } else {
+        *this = src;
-                    size_t base = numel() / dims_[0];
+        return *this;
-                    Tensor dst;
+    }
-                    dst.holder_ = holder_;
-                    dst.set_layout(layout_);
+    /**
-                    DDim dst_dims = dims_;
+     * @brief  Return a sub-tensor of the given tensor.
-                    dst_dims[0] = end_idx - begin_idx;
+     *
-                    dst.Resize(dst_dims);
+     * @param[in] begin_idx   The index of the start row(inclusive) to
-                    dst.offset_ =
+     * slice.
-                        offset_ + begin_idx * base * SizeOfType(type());
+     *                        The index number begins from 0.
-                    return dst;
+     * @param[in] end_idx     The index of the end row(exclusive) to
-                }
+     * slice.
-            }
+     *                        The index number begins from 0.
+     */
-            std::type_index type() const {
+    inline Tensor Slice(int begin_idx, int end_idx) const {
-                //                PADDLE_ENFORCE_NOT_NULL(
+        check_memory_size();
-                //                        holder_, "Tensor not initialized yet
+        //  PADDLE_ENFORCE_GE(begin_idx, 0,
-                //                        when
+        //                    "The start row index must be greater than
-                //                        Tensor::type() is called.");
+        //                    0.");
-                return holder_->type();
+        //  PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is
-            }
+        //  out of
+        //  bound."); PADDLE_ENFORCE_LT(
-            // memory size returns the holding memory size in byte.
+        //      begin_idx, end_idx,
-            size_t memory_size() const {
+        //      "The start row index must be lesser than the end row
-                return holder_ == nullptr ? 0UL : holder_->size() - offset_;
+        //      index.");
-            }
+        if (dims_[0] == 1) {
-            inline void check_memory_size() const {
+            return *this;
-                //  PADDLE_ENFORCE_NOT_NULL(
+        } else {
-                //      holder_, "Tensor holds no memory. Call
+            size_t base = numel() / dims_[0];
-                //      Tensor::mutable_data
+            Tensor dst;
-                //      first.");
+            dst.holder_ = holder_;
-                //  PADDLE_ENFORCE_LE(
+            dst.set_layout(layout_);
-                //      numel() * SizeOfType(type()), memory_size(),
+            DDim dst_dims = dims_;
-                //      "Tensor's dims_ is out of bound. Call
+            dst_dims[0] = end_idx - begin_idx;
-                //      Tensor::mutable_data "
+            dst.Resize(dst_dims);
-                //      "first to re-allocate memory.\n"
+            dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
-                //      "or maybe the required data-type mismatches the data
+            return dst;
-                //      already
+        }
-                //      stored.");
+    }
-            }
+    std::type_index type() const {
-            inline DataLayout layout() const { return layout_; }
+        //                PADDLE_ENFORCE_NOT_NULL(
+        //                        holder_, "Tensor not initialized yet
-            inline void set_layout(const DataLayout layout) {
+        //                        when
-                layout_ = layout;
+        //                        Tensor::type() is called.");
-            }
+        return holder_->type();
+    }
-          private:
-            /**
+    // memory size returns the holding memory size in byte.
-             * @note    Placeholder hides type T, so it doesn't appear as a
+    size_t memory_size() const {
-             * template
+        return holder_ == nullptr ? 0UL : holder_->size() - offset_;
-             *          parameter of Variable.
+    }
-             */
-            struct Placeholder {
+    inline void check_memory_size() const {
-                virtual ~Placeholder() = default;
+        //  PADDLE_ENFORCE_NOT_NULL(
+        //      holder_, "Tensor holds no memory. Call
-                virtual void *ptr() const = 0;
+        //      Tensor::mutable_data
+        //      first.");
-                virtual size_t size() const = 0;
+        //  PADDLE_ENFORCE_LE(
+        //      numel() * SizeOfType(type()), memory_size(),
-                virtual std::type_index type() const = 0;
+        //      "Tensor's dims_ is out of bound. Call
+        //      Tensor::mutable_data "
-                virtual void set_type(std::type_index type) = 0;
+        //      "first to re-allocate memory.\n"
-            };
+        //      "or maybe the required data-type mismatches the data
+        //      already
-            struct PlaceholderImpl : public Placeholder {
+        //      stored.");
-                PlaceholderImpl(size_t size, std::type_index type)
+    }
-                    : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
-                           memory::PODDeleter<uint8_t>()),
+    inline DataLayout layout() const { return layout_; }
-                      size_(size), type_(type) {
-                    //                    PADDLE_ENFORCE_NOT_NULL(ptr_,
+    inline void set_layout(const DataLayout layout) { layout_ = layout; }
-                    //                    "Insufficient %s
-                    //                    memory to allocation.",
+  private:
-                    //                                            (is_cpu_place(place_)
+    /**
-                    //                                            ?
+     * @note    Placeholder hides type T, so it doesn't appear as a
-                    //                                            "CPU" :
+     * template
-                    //                                            "GPU"));
+     *          parameter of Variable.
-                }
+     */
+    struct Placeholder {
-                virtual size_t size() const { return size_; }
+        virtual ~Placeholder() = default;
-                virtual void *ptr() const {
+        virtual void *ptr() const = 0;
-                    return static_cast<void *>(ptr_.get());
-                }
+        virtual size_t size() const = 0;
-                virtual std::type_index type() const { return type_; }
+        virtual std::type_index type() const = 0;
-                virtual void set_type(std::type_index type) { type_ = type; }
+        virtual void set_type(std::type_index type) = 0;
+    };
-                /*! the pointer of memory block. */
-                std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
+    struct PlaceholderImpl : public Placeholder {
+        PlaceholderImpl(size_t size, std::type_index type)
-                /*! the size of memory block. */
+            : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
-                size_t size_;
+                   memory::PODDeleter<uint8_t>()),
+              size_(size), type_(type) {
-                /* the current type of memory */
+            //                    PADDLE_ENFORCE_NOT_NULL(ptr_,
-                std::type_index type_;
+            //                    "Insufficient %s
-            };
+            //                    memory to allocation.",
+            //                                            (is_cpu_place(place_)
-            /*! holds the memory block if allocated. */
+            //                                            ?
-            std::shared_ptr<Placeholder> holder_;
+            //                                            "CPU" :
+            //                                            "GPU"));
-            /**
-             * @brief points to elements dimensions.
-             *
-             * @note dims_ do not indicate the memory block size.
-             */
-            DDim dims_;
-            /**
-             * @brief the layout of memory block, default is NHWC.
-             *
-             * @note the memory allocation order, describe how weight/data is
-             * stored
-             *       For example, in 4-D Tensor(rank=4), there are three
-             * commonly
-             *       used layout. They are
-             *            NCHW, NHWC, CHWN.
-             *       N,C,H,W for respectively the batch size, the number of
-             *       feature maps, the height, the width.
-             */
-            DataLayout layout_ = DataLayout::kNHWC;
-            /**
-             * @brief   A PlaceHolder may be shared by more than one tensor.
-             *
-             * @note    Some of them may be slices of the others. So the offset_
-             *          is introduced here to indicate the byte offset between
-             *          PlaceHolder::ptr_ and where the tensor data really
-             * begins.
-             */
-            size_t offset_;
-        };
-        inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) {
-            Tensor res;
-            res.ShareDataWith(src);
-            res.Resize(flatten_to_2d(src.dims(), num_col_dims));
-            return res;
        }
-    } // namespace framework
+        virtual size_t size() const { return size_; }
+        virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
+        virtual std::type_index type() const { return type_; }
+        virtual void set_type(std::type_index type) { type_ = type; }
+        /*! the pointer of memory block. */
+        std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
+        /*! the size of memory block. */
+        size_t size_;
+        /* the current type of memory */
+        std::type_index type_;
+    };
+    /*! holds the memory block if allocated. */
+    std::shared_ptr<Placeholder> holder_;
+    /**
+     * @brief points to elements dimensions.
+     *
+     * @note dims_ do not indicate the memory block size.
+     */
+    DDim dims_;
+    /**
+     * @brief the layout of memory block, default is NHWC.
+     *
+     * @note the memory allocation order, describe how weight/data is
+     * stored
+     *       For example, in 4-D Tensor(rank=4), there are three
+     * commonly
+     *       used layout. They are
+     *            NCHW, NHWC, CHWN.
+     *       N,C,H,W for respectively the batch size, the number of
+     *       feature maps, the height, the width.
+     */
+    DataLayout layout_ = DataLayout::kNHWC;
+    /**
+     * @brief   A PlaceHolder may be shared by more than one tensor.
+     *
+     * @note    Some of them may be slices of the others. So the offset_
+     *          is introduced here to indicate the byte offset between
+     *          PlaceHolder::ptr_ and where the tensor data really
+     * begins.
+     */
+    size_t offset_;
+};
+inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) {
+    Tensor res;
+    res.ShareDataWith(src);
+    res.Resize(flatten_to_2d(src.dims(), num_col_dims));
+    return res;
+}
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/tensor_util.cc
+++ b/src/framework/tensor_util.cc
@@ -18,189 +18,187 @@
 #include <vector>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        void TensorCopy(const Tensor &src, Tensor *dst) {
+void TensorCopy(const Tensor &src, Tensor *dst) {
-            //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
+    //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
-            //  src.place() << " to
+    //  src.place() << " to
-            //  "
+    //  "
-            //          << dst_place;
+    //          << dst_place;
-            src.check_memory_size();
+    src.check_memory_size();
-            dst->Resize(src.dims());
+    dst->Resize(src.dims());
-            dst->set_layout(src.layout());
+    dst->set_layout(src.layout());
-            auto src_ptr = src.data<void>();
+    auto src_ptr = src.data<void>();
-            auto dst_ptr = dst->mutable_data(src.type());
+    auto dst_ptr = dst->mutable_data(src.type());
-            auto size = src.numel() * SizeOfType(src.type());
+    auto size = src.numel() * SizeOfType(src.type());
-            memory::Copy(dst_ptr, src_ptr, size);
+    memory::Copy(dst_ptr, src_ptr, size);
-        }
+}
-        void TensorCopySync(const Tensor &src, Tensor *dst) {
+void TensorCopySync(const Tensor &src, Tensor *dst) {
-            //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
+    //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
-            //  src.place()
+    //  src.place()
-            //          << " to " << dst_place;
+    //          << " to " << dst_place;
-            src.check_memory_size();
+    src.check_memory_size();
-            dst->Resize(src.dims());
+    dst->Resize(src.dims());
-            dst->set_layout(src.layout());
+    dst->set_layout(src.layout());
-            auto src_ptr = src.data<void>();
+    auto src_ptr = src.data<void>();
-            auto dst_ptr = dst->mutable_data(src.type());
+    auto dst_ptr = dst->mutable_data(src.type());
-            auto size = src.numel() * SizeOfType(src.type());
+    auto size = src.numel() * SizeOfType(src.type());
-            memory::Copy(dst_ptr, src_ptr, size);
+    memory::Copy(dst_ptr, src_ptr, size);
-        }
+}
-        template <typename Predicate> struct AnyDTypeVisitor {
+template <typename Predicate> struct AnyDTypeVisitor {
-            Predicate predicate_;
+    Predicate predicate_;
-            const Tensor &tensor_;
+    const Tensor &tensor_;
-            Tensor *out_;
+    Tensor *out_;
-            AnyDTypeVisitor(Predicate predicate, const Tensor &tensor,
+    AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
-                            Tensor *out)
+        : predicate_(predicate), tensor_(tensor), out_(out) {}
-                : predicate_(predicate), tensor_(tensor), out_(out) {}
+    template <typename T> void operator()() const {
-            template <typename T> void operator()() const {
+        //    auto t = EigenVector<T>::Flatten(tensor_);
-                //    auto t = EigenVector<T>::Flatten(tensor_);
+        //    auto o = EigenScalar<bool>::From(*out_);
-                //    auto o = EigenScalar<bool>::From(*out_);
+        // return any of predicate_(t) is true.
-                // return any of predicate_(t) is true.
+        //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
-                //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
+    }
-            }
+};
-        };
+template <typename Predicate>
-        template <typename Predicate>
+inline void AnyImpl(Predicate predicate, const Tensor &tensor,
-        inline void AnyImpl(Predicate predicate, const Tensor &tensor,
+                    framework::Tensor *out) {
-                            framework::Tensor *out) {
+    VisitDataType(ToDataType(tensor.type()),
-            VisitDataType(ToDataType(tensor.type()),
+                  AnyDTypeVisitor<Predicate>(predicate, tensor, out));
-                          AnyDTypeVisitor<Predicate>(predicate, tensor, out));
+}
-        }
+template <typename Predicate> struct AnyVisitor {
-        template <typename Predicate> struct AnyVisitor {
+    const framework::Tensor &tensor_;
-            const framework::Tensor &tensor_;
+    Predicate predicate_;
-            Predicate predicate_;
+    AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
-            AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
+        : tensor_(tensor), predicate_(std::move(predicate)) {}
-                : tensor_(tensor), predicate_(std::move(predicate)) {}
+    bool operator()(void) const {
-            bool operator()(void) const {
+        framework::Tensor out;
-                framework::Tensor out;
+        out.Resize({1});
-                out.Resize({1});
+        out.mutable_data<bool>();
-                out.mutable_data<bool>();
+        AnyImpl(predicate_, tensor_, &out);
-                AnyImpl(predicate_, tensor_, &out);
+        return this->GetResult(out);
-                return this->GetResult(out);
+    }
-            }
+    bool GetResult(const framework::Tensor &out) const {
-            bool GetResult(const framework::Tensor &out) const {
+        return *out.data<bool>();
-                return *out.data<bool>();
+    }
-            }
+};
-        };
+template <typename Predicate>
-        template <typename Predicate>
+inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
-        inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
+    AnyVisitor<Predicate> visitor(tensor, predicate);
-            AnyVisitor<Predicate> visitor(tensor, predicate);
+    //  return platform::VisitPlace(visitor);
-            //  return platform::VisitPlace(visitor);
+    return visitor();
-            return visitor();
+}
-        }
+struct ContainsNANPredicate {
-        struct ContainsNANPredicate {
+    template <typename T>
-            template <typename T>
+    auto operator()(const T &eigen_vec) const
-            auto operator()(const T &eigen_vec) const
+        -> decltype(std::declval<T>().isnan()) {
-                -> decltype(std::declval<T>().isnan()) {
+        // Cast eigen_vector to vector of bool. true if is inf.
-                // Cast eigen_vector to vector of bool. true if is inf.
+        return eigen_vec.isnan();
-                return eigen_vec.isnan();
+    }
-            }
+};
-        };
+bool TensorContainsNAN(const framework::Tensor &tensor) {
-        bool TensorContainsNAN(const framework::Tensor &tensor) {
+    ContainsNANPredicate predicate;
-            ContainsNANPredicate predicate;
+    return Any(tensor, predicate);
-            return Any(tensor, predicate);
+}
-        }
+struct ContainsInfPredicate {
-        struct ContainsInfPredicate {
+    template <typename T>
-            template <typename T>
+    auto operator()(const T &eigen_vec) const
-            auto operator()(const T &eigen_vec) const
+        -> decltype(std::declval<T>().isinf()) {
-                -> decltype(std::declval<T>().isinf()) {
+        // Cast eigen_vector to vector of bool. true if is inf.
-                // Cast eigen_vector to vector of bool. true if is inf.
+        return eigen_vec.isinf();
-                return eigen_vec.isinf();
+    }
-            }
+};
-        };
+bool TensorContainsInf(const framework::Tensor &tensor) {
-        bool TensorContainsInf(const framework::Tensor &tensor) {
+    ContainsInfPredicate predicate;
-            ContainsInfPredicate predicate;
+    return Any(tensor, predicate);
-            return Any(tensor, predicate);
+}
-        }
+void TensorToStream(std::ostream &os, const Tensor &tensor) {
-        void TensorToStream(std::ostream &os, const Tensor &tensor) {
+    { // the 1st field, uint32_t version
-            { // the 1st field, uint32_t version
+        constexpr uint32_t version = 0;
-                constexpr uint32_t version = 0;
+        os.write(reinterpret_cast<const char *>(&version), sizeof(version));
-                os.write(reinterpret_cast<const char *>(&version),
+    }
-                         sizeof(version));
+    { // the 2nd field, tensor description
-            }
+        // int32_t  size
-            { // the 2nd field, tensor description
+        // void*    protobuf message
-                // int32_t  size
+        proto::VarType::TensorDesc desc;
-                // void*    protobuf message
+        desc.set_data_type(framework::ToDataType(tensor.type()));
-                proto::VarType::TensorDesc desc;
+        auto dims = framework::vectorize(tensor.dims());
-                desc.set_data_type(framework::ToDataType(tensor.type()));
+        auto *pb_dims = desc.mutable_dims();
-                auto dims = framework::vectorize(tensor.dims());
+        pb_dims->Resize(static_cast<int>(dims.size()), 0);
-                auto *pb_dims = desc.mutable_dims();
+        std::copy(dims.begin(), dims.end(), pb_dims->begin());
-                pb_dims->Resize(static_cast<int>(dims.size()), 0);
+        int32_t size = desc.ByteSize();
-                std::copy(dims.begin(), dims.end(), pb_dims->begin());
+        os.write(reinterpret_cast<const char *>(&size), sizeof(size));
-                int32_t size = desc.ByteSize();
+        auto out = desc.SerializeAsString();
-                os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+        os.write(out.data(), size);
-                auto out = desc.SerializeAsString();
+    }
-                os.write(out.data(), size);
+    { // the 3rd field, tensor data
-            }
+        uint64_t size = tensor.memory_size();
-            { // the 3rd field, tensor data
+        auto *data_ptr = tensor.data<void>();
-                uint64_t size = tensor.memory_size();
+        //    PADDLE_ENFORCE(size <
-                auto *data_ptr = tensor.data<void>();
+        //    std::numeric_limits<std::streamsize>::max(),
-                //    PADDLE_ENFORCE(size <
+        //                   "Index overflow when writing tensor");
-                //    std::numeric_limits<std::streamsize>::max(),
-                //                   "Index overflow when writing tensor");
+        os.write(static_cast<const char *>(data_ptr),
+                 static_cast<std::streamsize>(size));
-                os.write(static_cast<const char *>(data_ptr),
+    }
-                         static_cast<std::streamsize>(size));
+}
-            }
-        }
+struct DeserializedDataFunctor {
+    DeserializedDataFunctor(void **buf, Tensor *tensor)
-        struct DeserializedDataFunctor {
+        : buf_(buf), tensor_(tensor) {}
-            DeserializedDataFunctor(void **buf, Tensor *tensor)
-                : buf_(buf), tensor_(tensor) {}
+    template <typename T> void operator()() {
+        *buf_ = tensor_->mutable_data<T>();
-            template <typename T> void operator()() {
+    }
-                *buf_ = tensor_->mutable_data<T>();
-            }
+    void **buf_;
+    Tensor *tensor_;
-            void **buf_;
+};
-            Tensor *tensor_;
-        };
+void TensorFromStream(std::istream &is, framework::Tensor *tensor) {
+    uint32_t version;
-        void TensorFromStream(std::istream &is, framework::Tensor *tensor) {
+    is.read(reinterpret_cast<char *>(&version), sizeof(version));
-            uint32_t version;
+    //  PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
-            is.read(reinterpret_cast<char *>(&version), sizeof(version));
+    proto::VarType::TensorDesc desc;
-            //  PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
+    { // int32_t size
-            proto::VarType::TensorDesc desc;
+        // proto buffer
-            { // int32_t size
+        int32_t size;
-                // proto buffer
+        is.read(reinterpret_cast<char *>(&size), sizeof(size));
-                int32_t size;
+        std::unique_ptr<char[]> buf(new char[size]);
-                is.read(reinterpret_cast<char *>(&size), sizeof(size));
+        is.read(reinterpret_cast<char *>(buf.get()), size);
-                std::unique_ptr<char[]> buf(new char[size]);
+        //    PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
-                is.read(reinterpret_cast<char *>(buf.get()), size);
+        //                   "Cannot parse tensor desc");
-                //    PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
+    }
-                //                   "Cannot parse tensor desc");
+    { // read tensor
-            }
+        std::vector<int64_t> dims;
-            { // read tensor
+        dims.reserve(static_cast<size_t>(desc.dims().size()));
-                std::vector<int64_t> dims;
+        std::copy(desc.dims().begin(), desc.dims().end(),
-                dims.reserve(static_cast<size_t>(desc.dims().size()));
+                  std::back_inserter(dims));
-                std::copy(desc.dims().begin(), desc.dims().end(),
+        tensor->Resize(framework::make_ddim(dims));
-                          std::back_inserter(dims));
+        void *buf;
-                tensor->Resize(framework::make_ddim(dims));
-                void *buf;
+        framework::VisitDataType(desc.data_type(),
+                                 DeserializedDataFunctor(&buf, tensor));
-                framework::VisitDataType(desc.data_type(),
+        is.read(static_cast<char *>(buf), tensor->memory_size());
-                                         DeserializedDataFunctor(&buf, tensor));
+    }
-                is.read(static_cast<char *>(buf), tensor->memory_size());
+}
-            }
-        }
+} // namespace framework
-    } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
@@ -20,47 +20,47 @@ limitations under the License. */
 #include <vector>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        void TensorCopy(const Tensor &src, Tensor *dst);
+void TensorCopy(const Tensor &src, Tensor *dst);
-        void TensorCopySync(const Tensor &src, Tensor *dst);
+void TensorCopySync(const Tensor &src, Tensor *dst);
-        template <typename T>
+template <typename T>
-        void TensorFromVector(const std::vector<T> &src, Tensor *dst);
+void TensorFromVector(const std::vector<T> &src, Tensor *dst);
-        template <typename T>
+template <typename T>
-        void TesnorToVector(const Tensor &src, std::vector<T> *dst);
+void TesnorToVector(const Tensor &src, std::vector<T> *dst);
-        bool TensorContainsNAN(const framework::Tensor &tensor);
+bool TensorContainsNAN(const framework::Tensor &tensor);
-        bool TensorContainsInf(const framework::Tensor &tensor);
+bool TensorContainsInf(const framework::Tensor &tensor);
-        void TensorToStream(std::ostream &os, const Tensor &tensor);
+void TensorToStream(std::ostream &os, const Tensor &tensor);
-        void TensorFromStream(std::istream &is, Tensor *tensor);
+void TensorFromStream(std::istream &is, Tensor *tensor);
-        //
+//
-        // The implementation of template functions.
+// The implementation of template functions.
-        //
+//
-        template <typename T>
+template <typename T>
-        void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
+void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
-            auto src_ptr = static_cast<const void *>(src.data());
+    auto src_ptr = static_cast<const void *>(src.data());
-            dst->Resize({static_cast<int64_t>(src.size())});
+    dst->Resize({static_cast<int64_t>(src.size())});
-            auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
+    auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
-            auto size = src.size() * sizeof(T);
+    auto size = src.size() * sizeof(T);
-            memory::Copy(dst_ptr, src_ptr, size);
+    memory::Copy(dst_ptr, src_ptr, size);
-        }
+}
-        template <typename T>
+template <typename T>
-        void TensorToVector(const Tensor &src, std::vector<T> *dst) {
+void TensorToVector(const Tensor &src, std::vector<T> *dst) {
-            auto src_ptr = static_cast<const void *>(src.data<T>());
+    auto src_ptr = static_cast<const void *>(src.data<T>());
-            auto size = src.numel() * sizeof(T);
+    auto size = src.numel() * sizeof(T);
-            dst->resize(src.numel());
+    dst->resize(src.numel());
-            auto dst_ptr = static_cast<void *>(dst->data());
+    auto dst_ptr = static_cast<void *>(dst->data());
-            memory::Copy(dst_ptr, src_ptr, size);
+    memory::Copy(dst_ptr, src_ptr, size);
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/var_desc.cpp
+++ b/src/framework/var_desc.cpp
@@ -20,9 +20,9 @@ SOFTWARE.
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        VarDesc::VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
+VarDesc::VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/var_desc.h
+++ b/src/framework/var_desc.h
@@ -22,68 +22,67 @@ SOFTWARE.
 #include "paddle_mobile_object.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class VarDesc {
+class VarDesc {
-          public:
+  public:
-            VarDesc(const proto::VarDesc &desc);
+    VarDesc(const proto::VarDesc &desc);
-            std::string Name() const { return desc_.name(); }
+    std::string Name() const { return desc_.name(); }
-            proto::VarType::Type GetType() const { return desc_.type().type(); }
+    proto::VarType::Type GetType() const { return desc_.type().type(); }
-            bool Persistable() const { return desc_.persistable(); }
+    bool Persistable() const { return desc_.persistable(); }
-            const proto::VarType::ChannelDesc &channel_desc() const {
+    const proto::VarType::ChannelDesc &channel_desc() const {
-                switch (desc_.type().type()) {
+        switch (desc_.type().type()) {
-                case proto::VarType::CHANNEL:
+        case proto::VarType::CHANNEL:
-                    return desc_.type().channel();
+            return desc_.type().channel();
-                default:
+        default:
-                    break;
+            break;
-                }
+        }
-            }
+    }
-            const proto::VarType::TensorDesc &tensor_desc() const {
+    const proto::VarType::TensorDesc &tensor_desc() const {
-                switch (desc_.type().type()) {
+        switch (desc_.type().type()) {
-                case proto::VarType::SELECTED_ROWS:
+        case proto::VarType::SELECTED_ROWS:
-                    return desc_.type().selected_rows();
+            return desc_.type().selected_rows();
-                case proto::VarType::LOD_TENSOR:
+        case proto::VarType::LOD_TENSOR:
-                    return desc_.type().lod_tensor().tensor();
+            return desc_.type().lod_tensor().tensor();
-                case proto::VarType::LOD_TENSOR_ARRAY:
+        case proto::VarType::LOD_TENSOR_ARRAY:
-                    return desc_.type().tensor_array().tensor();
+            return desc_.type().tensor_array().tensor();
-                default:
+        default:
-                    break;
+            break;
-                }
+        }
-            }
+    }
-            proto::VarType::Type GetDataType() const {
+    proto::VarType::Type GetDataType() const {
-                switch (desc_.type().type()) {
+        switch (desc_.type().type()) {
-                case proto::VarType::CHANNEL:
+        case proto::VarType::CHANNEL:
-                    return channel_desc().data_type();
+            return channel_desc().data_type();
-                    break;
+            break;
-                default:
+        default:
-                    return tensor_desc().data_type();
+            return tensor_desc().data_type();
-                }
+        }
-            }
+    }
-            template <typename T>
+    template <typename T>
-            std::vector<T> RepeatedToVector(
+    std::vector<T> RepeatedToVector(
-                const google::protobuf::RepeatedField<T> &repeated_field)
+        const google::protobuf::RepeatedField<T> &repeated_field) const {
-                const {
+        std::vector<T> ret;
-                std::vector<T> ret;
+        ret.reserve(repeated_field.size());
-                ret.reserve(repeated_field.size());
+        std::copy(repeated_field.begin(), repeated_field.end(),
-                std::copy(repeated_field.begin(), repeated_field.end(),
+                  std::back_inserter(ret));
-                          std::back_inserter(ret));
+        return ret;
-                return ret;
+    }
-            }
-            std::vector<int64_t> GetShape() const {
+    std::vector<int64_t> GetShape() const {
-                return this->RepeatedToVector(tensor_desc().dims());
+        return this->RepeatedToVector(tensor_desc().dims());
-            }
+    }
-          private:
+  private:
-            proto::VarDesc desc_;
+    proto::VarDesc desc_;
-        };
+};
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/var_type.h
+++ b/src/framework/var_type.h
@@ -23,17 +23,17 @@ SOFTWARE.
 #include "variable.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        inline proto::VarType::Type ToVarType(std::type_index type) {
+inline proto::VarType::Type ToVarType(std::type_index type) {
-            if (type.hash_code() == typeid(LoDTensor).hash_code()) {
+    if (type.hash_code() == typeid(LoDTensor).hash_code()) {
-                return proto::VarType_Type_LOD_TENSOR;
+        return proto::VarType_Type_LOD_TENSOR;
-            } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
+    } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
-                return proto::VarType_Type_SELECTED_ROWS;
+        return proto::VarType_Type_SELECTED_ROWS;
-            } else {
+    } else {
-                //    PADDLE_THROW("ToVarType:Unsupported type %s",
+        //    PADDLE_THROW("ToVarType:Unsupported type %s",
-                //    type.name());
+        //    type.name());
-            }
+    }
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/variable.h
+++ b/src/framework/variable.h
@@ -26,71 +26,71 @@ SOFTWARE.
 #include <typeinfo>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        class Variable : public PaddleMobileObject {
+class Variable : public PaddleMobileObject {
-          public:
+  public:
-            template <typename T> const T *Get() const {
+    template <typename T> const T *Get() const {
-                return static_cast<const T *>(holder_->Ptr());
+        return static_cast<const T *>(holder_->Ptr());
+    }
+    bool IsInitialized() const { return holder_ != nullptr; }
+    const std::string *Name() { return name_; }
+    template <typename T> T *GetMutable() {
+        if (!IsType<T>()) {
+            if (*Name() == "pixel") {
+                //        std::cout << " reset " << *Name() <<
+                //        std::endl;
            }
+            holder_.reset(new PlaceholderImp<T>(new T()));
-            bool IsInitialized() const { return holder_ != nullptr; }
+        }
+        return static_cast<T *>(holder_->Ptr());
-            const std::string *Name() { return name_; }
+    }
-            template <typename T> T *GetMutable() {
+    template <typename T> bool IsType() const {
-                if (!IsType<T>()) {
+        if (holder_) {
-                    if (*Name() == "pixel") {
+            //                printf("not null \n");
-                        //        std::cout << " reset " << *Name() <<
+            printf(" holder type : %s, this type %s \n", holder_->Type().name(),
-                        //        std::endl;
+                   typeid(T).name());
-                    }
+        }
-                    holder_.reset(new PlaceholderImp<T>(new T()));
-                }
+        //              std::cout << " " << holder_->Type() << " " <<
-                return static_cast<T *>(holder_->Ptr());
+        //              typeid(T) <<
-            }
+        //              std::endl;
+        return holder_ != nullptr && holder_->Type() == typeid(T);
-            template <typename T> bool IsType() const {
+    }
-                if (holder_) {
-                    //                printf("not null \n");
+    void Clear() { holder_.reset(); }
-                    printf(" holder type : %s, this type %s \n",
-                           holder_->Type().name(), typeid(T).name());
+    std::type_index Type() const { return holder_->Type(); }
-                }
+    void SetName(const std::string *name) { name_ = name; }
-                //              std::cout << " " << holder_->Type() << " " <<
-                //              typeid(T) <<
+  private:
-                //              std::endl;
+    struct Placeholder {
-                return holder_ != nullptr && holder_->Type() == typeid(T);
+        Placeholder() = default;
-            }
+        virtual ~Placeholder() = default;
-            void Clear() { holder_.reset(); }
+        virtual const std::type_info &Type() const = 0;
+        virtual void *Ptr() const = 0;
-            std::type_index Type() const { return holder_->Type(); }
+    };
-            void SetName(const std::string *name) { name_ = name; }
+    template <typename T> struct PlaceholderImp : public Placeholder {
+        explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}
-          private:
-            struct Placeholder {
+        virtual const std::type_info &Type() const { return type_; }
-                Placeholder() = default;
+        virtual void *Ptr() const override {
-                virtual ~Placeholder() = default;
+            return static_cast<void *>(ptr_.get());
+        }
-                virtual const std::type_info &Type() const = 0;
-                virtual void *Ptr() const = 0;
+        std::unique_ptr<T> ptr_;
-            };
+        const std::type_info &type_;
+    };
-            template <typename T> struct PlaceholderImp : public Placeholder {
-                explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}
+    std::unique_ptr<Placeholder> holder_;
+    friend class Scope;
-                virtual const std::type_info &Type() const { return type_; }
+    const std::string *name_;
-                virtual void *Ptr() const override {
+};
-                    return static_cast<void *>(ptr_.get());
+} // namespace framework
-                }
-                std::unique_ptr<T> ptr_;
-                const std::type_info &type_;
-            };
-            std::unique_ptr<Placeholder> holder_;
-            friend class Scope;
-            const std::string *name_;
-        };
-    } // namespace framework
 } // namespace paddle_mobile
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -28,398 +28,386 @@ SOFTWARE.
 namespace paddle_mobile {
-    void ReadBinaryFile(const std::string &filename, std::string *contents) {
+void ReadBinaryFile(const std::string &filename, std::string *contents) {
-        std::ifstream fin(filename, std::ios::in | std::ios::binary);
+    std::ifstream fin(filename, std::ios::in | std::ios::binary);
-        fin.seekg(0, std::ios::end);
+    fin.seekg(0, std::ios::end);
-        contents->clear();
+    contents->clear();
-        contents->resize(fin.tellg());
+    contents->resize(fin.tellg());
-        fin.seekg(0, std::ios::beg);
+    fin.seekg(0, std::ios::beg);
-        fin.read(&(contents->at(0)), contents->size());
+    fin.read(&(contents->at(0)), contents->size());
-        fin.close();
+    fin.close();
-    }
+}
-    template <typename Dtype, Precision P>
+template <typename Dtype, Precision P>
-    void Loader<Dtype, P>::LoadVar(framework::LoDTensor *tensor,
+void Loader<Dtype, P>::LoadVar(framework::LoDTensor *tensor,
-                                   const std::string &file_path) {
+                               const std::string &file_path) {
-        //        LOG(kLOG_DEBUG) << "  to load " << file_path;
+    //        LOG(kLOG_DEBUG) << "  to load " << file_path;
-        //  Log(kLOG_DEBUG) << "123";
+    //  Log(kLOG_DEBUG) << "123";
-        std::ifstream is(file_path);
+    std::ifstream is(file_path);
-        std::streampos pos = is.tellg(); //   save   current   position
+    std::streampos pos = is.tellg(); //   save   current   position
-        is.seekg(0, std::ios::end);
+    is.seekg(0, std::ios::end);
-        //        LOG(kLOG_DEBUG) << "  file length = " << is.tellg();
+    //        LOG(kLOG_DEBUG) << "  file length = " << is.tellg();
-        is.seekg(pos); //   restore   saved   position
+    is.seekg(pos); //   restore   saved   position
-        // 1. version
+    // 1. version
-        uint32_t version;
+    uint32_t version;
-        is.read(reinterpret_cast<char *>(&version), sizeof(version));
+    is.read(reinterpret_cast<char *>(&version), sizeof(version));
-        //        LOG(kLOG_INFO) << "   version: " << version;
+    //        LOG(kLOG_INFO) << "   version: " << version;
-        // 2 Lod information
+    // 2 Lod information
-        uint64_t lod_level;
+    uint64_t lod_level;
-        is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+    is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
-        //        LOG(kLOG_DEBUG) << "   load level: " << lod_level;
+    //        LOG(kLOG_DEBUG) << "   load level: " << lod_level;
-        //        LOG(kLOG_DEBUG) << "   lod info: ";
+    //        LOG(kLOG_DEBUG) << "   lod info: ";
-        auto &lod = *tensor->mutable_lod();
+    auto &lod = *tensor->mutable_lod();
-        lod.resize(lod_level);
+    lod.resize(lod_level);
-        for (uint64_t i = 0; i < lod_level; ++i) {
+    for (uint64_t i = 0; i < lod_level; ++i) {
-            uint64_t size;
+        uint64_t size;
-            is.read(reinterpret_cast<char *>(&size), sizeof(size));
-            std::vector<size_t> tmp(size / sizeof(size_t));
-            is.read(reinterpret_cast<char *>(tmp.data()),
-                    static_cast<std::streamsize>(size));
-            for (int j = 0; j < tmp.size(); ++j) {
-                LOG(kLOG_DEBUG1) << "    lod - " << tmp[j];
-            }
-            lod[i] = tmp;
-        }
-        // 3. tensor version
-        uint32_t tensor_version;
-        is.read(reinterpret_cast<char *>(&tensor_version),
-                sizeof(tensor_version));
-        //  std::cout << "   tensor_version: " << tensor_version << std::endl;
-        // 4. tensor desc
-        int32_t size;
        is.read(reinterpret_cast<char *>(&size), sizeof(size));
-        //  std::cout << "   tensor desc size: " << size << std::endl;
+        std::vector<size_t> tmp(size / sizeof(size_t));
-        std::unique_ptr<char[]> buf(new char[size]);
+        is.read(reinterpret_cast<char *>(tmp.data()),
-        is.read(reinterpret_cast<char *>(buf.get()), size);
+                static_cast<std::streamsize>(size));
+        for (int j = 0; j < tmp.size(); ++j) {
-        framework::proto::VarType::TensorDesc desc;
+            LOG(kLOG_DEBUG1) << "    lod - " << tmp[j];
-        desc.ParseFromArray(buf.get(), size);
-        //  std::cout << "   desc dims size " << desc.dims().size() <<
-        //  std::endl;
-        int memory_size = 1;
-        for (int l = 0; l < desc.dims().size(); ++l) {
-            //    std::cout << "    dim " << l << " value: " << desc.dims()[l]
-            //    <<
-            //    std::endl;
-            memory_size *= desc.dims()[l];
        }
+        lod[i] = tmp;
+    }
-        std::vector<int64_t> dims;
+    // 3. tensor version
-        dims.reserve(static_cast<size_t>(desc.dims().size()));
+    uint32_t tensor_version;
-        std::copy(desc.dims().begin(), desc.dims().end(),
+    is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
-                  std::back_inserter(dims));
+    //  std::cout << "   tensor_version: " << tensor_version << std::endl;
-        tensor->Resize(framework::make_ddim(dims));
+    // 4. tensor desc
-        void *memory;
+    int32_t size;
-        int type_size = 0;
+    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-        //  std::cout << "    desc pre type: ";
+    //  std::cout << "   tensor desc size: " << size << std::endl;
-        switch (desc.data_type()) {
+    std::unique_ptr<char[]> buf(new char[size]);
-        case framework::proto::VarType::FP16:
+    is.read(reinterpret_cast<char *>(buf.get()), size);
-            //      std::cout << "FP16" << std::endl;
-            type_size = 2;
+    framework::proto::VarType::TensorDesc desc;
-            break;
+    desc.ParseFromArray(buf.get(), size);
-        case framework::proto::VarType::FP32:
-            type_size = 4;
+    //  std::cout << "   desc dims size " << desc.dims().size() <<
-            memory = tensor->mutable_data<float>();
+    //  std::endl;
-            //      std::cout << "FP32" << std::endl;
+    int memory_size = 1;
-            break;
+    for (int l = 0; l < desc.dims().size(); ++l) {
-        case framework::proto::VarType::FP64:
+        //    std::cout << "    dim " << l << " value: " << desc.dims()[l]
-            type_size = 8;
+        //    <<
-            //      std::cout << "FP64" << std::endl;
+        //    std::endl;
-            break;
+        memory_size *= desc.dims()[l];
-        case framework::proto::VarType::INT32:
+    }
-            type_size = 4;
-            //      std::cout << "INT32" << std::endl;
-            break;
-        case framework::proto::VarType::INT64:
-            type_size = 8;
-            //      std::cout << "INT64" << std::endl;
-            break;
-        case framework::proto::VarType::BOOL:
-            type_size = 1;
-            //      std::cout << "BOOL" << std::endl;
-            break;
-        default:
-            break;
-            //      std::cout << "    not support" << std::endl;
-        }
-        //  std::cout << "    malloc size: " << memory_size * type_size <<
+    std::vector<int64_t> dims;
-        //  std::endl;
+    dims.reserve(static_cast<size_t>(desc.dims().size()));
-        is.read(static_cast<char *>(memory), memory_size * type_size);
+    std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
-        //  std::cout << "    memory: " << memory << std::endl;
+    tensor->Resize(framework::make_ddim(dims));
-        is.close();
-    };
+    void *memory;
+    int type_size = 0;
-    template <typename Dtype, Precision P>
+    //  std::cout << "    desc pre type: ";
-    const framework::Program<Dtype, P>
+    switch (desc.data_type()) {
-    Loader<Dtype, P>::Load(const std::string &dirname) {
+    case framework::proto::VarType::FP16:
-        std::string model_filename = dirname + "/__model__";
+        //      std::cout << "FP16" << std::endl;
-        std::string program_desc_str;
+        type_size = 2;
-        ReadBinaryFile(model_filename, &program_desc_str);
+        break;
-        framework::proto::ProgramDesc program_desc_proto;
+    case framework::proto::VarType::FP32:
-        program_desc_proto.ParseFromString(program_desc_str);
+        type_size = 4;
+        memory = tensor->mutable_data<float>();
-        std::shared_ptr<framework::ProgramDesc> originProgramDesc =
+        //      std::cout << "FP32" << std::endl;
-            std::make_shared<framework::ProgramDesc>(program_desc_proto);
+        break;
+    case framework::proto::VarType::FP64:
-        framework::Program<Dtype, P> program;
+        type_size = 8;
-        program.originProgram = originProgramDesc;
+        //      std::cout << "FP64" << std::endl;
+        break;
-        std::shared_ptr<framework::Scope> scope =
+    case framework::proto::VarType::INT32:
-            std::make_shared<framework::Scope>();
+        type_size = 4;
-        program.scope = scope;
+        //      std::cout << "INT32" << std::endl;
+        break;
-        auto block = originProgramDesc->Block(0);
+    case framework::proto::VarType::INT64:
+        type_size = 8;
-        for (auto block : originProgramDesc->Blocks()) {
+        //      std::cout << "INT64" << std::endl;
-            //    std::cout << "for block" << std::endl;
+        break;
-            for (int i = 0; i < block->Vars().size(); ++i) {
+    case framework::proto::VarType::BOOL:
-                std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
+        type_size = 1;
-                auto var = scope->Var(var_desc->Name());
+        //      std::cout << "BOOL" << std::endl;
-                if (var_desc->GetType() ==
+        break;
-                    framework::proto::VarType::LOD_TENSOR) {
+    default:
-                    if (var_desc->Persistable() &&
+        break;
-                        var_desc->GetType() !=
+        //      std::cout << "    not support" << std::endl;
-                            framework::proto::VarType::FEED_MINIBATCH &&
+    }
-                        var_desc->GetType() !=
-                            framework::proto::VarType::FETCH_LIST) {
+    //  std::cout << "    malloc size: " << memory_size * type_size <<
-                        framework::LoDTensor *tensor =
+    //  std::endl;
-                            var->GetMutable<framework::LoDTensor>();
+    is.read(static_cast<char *>(memory), memory_size * type_size);
-                        // to load
+    //  std::cout << "    memory: " << memory << std::endl;
-                        LoadVar(tensor, dirname + "/" + var_desc->Name());
+    is.close();
-                    }
+};
-                } else {
-                    //        std::cout << "非 lod" << std::endl;
+template <typename Dtype, Precision P>
+const framework::Program<Dtype, P>
+Loader<Dtype, P>::Load(const std::string &dirname) {
+    std::string model_filename = dirname + "/__model__";
+    std::string program_desc_str;
+    ReadBinaryFile(model_filename, &program_desc_str);
+    framework::proto::ProgramDesc program_desc_proto;
+    program_desc_proto.ParseFromString(program_desc_str);
+    std::shared_ptr<framework::ProgramDesc> originProgramDesc =
+        std::make_shared<framework::ProgramDesc>(program_desc_proto);
+    framework::Program<Dtype, P> program;
+    program.originProgram = originProgramDesc;
+    std::shared_ptr<framework::Scope> scope =
+        std::make_shared<framework::Scope>();
+    program.scope = scope;
+    auto block = originProgramDesc->Block(0);
+    for (auto block : originProgramDesc->Blocks()) {
+        //    std::cout << "for block" << std::endl;
+        for (int i = 0; i < block->Vars().size(); ++i) {
+            std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
+            auto var = scope->Var(var_desc->Name());
+            if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
+                if (var_desc->Persistable() &&
+                    var_desc->GetType() !=
+                        framework::proto::VarType::FEED_MINIBATCH &&
+                    var_desc->GetType() !=
+                        framework::proto::VarType::FETCH_LIST) {
+                    framework::LoDTensor *tensor =
+                        var->GetMutable<framework::LoDTensor>();
+                    // to load
+                    LoadVar(tensor, dirname + "/" + var_desc->Name());
                }
+            } else {
+                //        std::cout << "非 lod" << std::endl;
            }
        }
+    }
 #ifdef PADDLE_MOBILE_DEBUG
-        for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
+    for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
-            framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
+        framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
-            LOG(kLOG_DEBUG) << "block: " << block.idx();
+        LOG(kLOG_DEBUG) << "block: " << block.idx();
-            for (int j = 0; j < block.ops().size(); ++j) {
+        for (int j = 0; j < block.ops().size(); ++j) {
-                framework::proto::OpDesc op = block.ops()[j];
+            framework::proto::OpDesc op = block.ops()[j];
-                LOG(kLOG_DEBUG1) << " op: " << op.type();
+            LOG(kLOG_DEBUG1) << " op: " << op.type();
-                for (int m = 0; m < op.inputs_size(); ++m) {
+            for (int m = 0; m < op.inputs_size(); ++m) {
-                    const framework::proto::OpDesc::Var &var = op.inputs(m);
+                const framework::proto::OpDesc::Var &var = op.inputs(m);
-                    LOG(kLOG_DEBUG2) << "  input parameter: "
+                LOG(kLOG_DEBUG2) << "  input parameter: " << var.parameter();
-                                     << var.parameter();
+                for (int n = 0; n < var.arguments().size(); ++n) {
-                    for (int n = 0; n < var.arguments().size(); ++n) {
+                    LOG(kLOG_DEBUG3) << "   argument - " << var.arguments()[n];
-                        LOG(kLOG_DEBUG3) << "   argument - "
-                                         << var.arguments()[n];
-                    }
                }
+            }
-                for (int y = 0; y < op.outputs_size(); ++y) {
+            for (int y = 0; y < op.outputs_size(); ++y) {
-                    const framework::proto::OpDesc::Var &var = op.outputs(y);
+                const framework::proto::OpDesc::Var &var = op.outputs(y);
-                    LOG(kLOG_DEBUG2) << "  out parameter: " << var.parameter();
+                LOG(kLOG_DEBUG2) << "  out parameter: " << var.parameter();
-                    for (int z = 0; z < var.arguments().size(); ++z) {
+                for (int z = 0; z < var.arguments().size(); ++z) {
-                        LOG(kLOG_DEBUG3) << "   argument - "
+                    LOG(kLOG_DEBUG3) << "   argument - " << var.arguments()[z];
-                                         << var.arguments()[z];
-                    }
                }
+            }
-                for (int x = 0; x < op.attrs().size(); ++x) {
+            for (int x = 0; x < op.attrs().size(); ++x) {
-                    const framework::proto::OpDesc_Attr attr = op.attrs()[x];
+                const framework::proto::OpDesc_Attr attr = op.attrs()[x];
-                    //        std::cout << "  attr name: " << attr.name() <<
+                //        std::cout << "  attr name: " << attr.name() <<
-                    //        std::endl;
+                //        std::endl;
-                    //        std::cout << "  attr type: " << attr.type() <<
+                //        std::cout << "  attr type: " << attr.type() <<
-                    //        std::endl;
+                //        std::endl;
-                    switch (attr.type()) {
+                switch (attr.type()) {
-                    case framework::proto::AttrType::BOOLEAN:
+                case framework::proto::AttrType::BOOLEAN:
-                        //            std::cout << "   boolen: " << attr.b() <<
+                    //            std::cout << "   boolen: " << attr.b() <<
-                        //            std::endl;
-                        break;
-                    case framework::proto::AttrType::INT:
-                        //            std::cout << "   int: " << attr.i() <<
-                        //            std::endl;
-                        break;
-                    case framework::proto::AttrType::FLOAT:
-                    //            std::cout << "   float: " << attr.f() <<
                    //            std::endl;
-                    case framework::proto::AttrType::STRING:
+                    break;
-                    //            std::cout << "   string: " << attr.s() <<
+                case framework::proto::AttrType::INT:
+                    //            std::cout << "   int: " << attr.i() <<
                    //            std::endl;
-                    case framework::proto::AttrType::BOOLEANS:
+                    break;
-                        //                            std::vector<bool>
+                case framework::proto::AttrType::FLOAT:
-                        //                            bools(attr.bools_size());
+                //            std::cout << "   float: " << attr.f() <<
-                        for (int y = 0; y < attr.bools_size(); ++y) {
+                //            std::endl;
-                            //              std::cout << "   bool - " <<
+                case framework::proto::AttrType::STRING:
-                            //              attr.bools(y) <<
+                //            std::cout << "   string: " << attr.s() <<
-                            //              std::endl;
+                //            std::endl;
-                        }
+                case framework::proto::AttrType::BOOLEANS:
-                    case framework::proto::AttrType::LONG:
+                    //                            std::vector<bool>
-                    //            std::cout << "   long: " << attr.l() <<
+                    //                            bools(attr.bools_size());
-                    //            std::endl;
+                    for (int y = 0; y < attr.bools_size(); ++y) {
-                    case framework::proto::AttrType::FLOATS:
+                        //              std::cout << "   bool - " <<
-                        for (int y = 0; y < attr.floats_size(); ++y) {
+                        //              attr.bools(y) <<
-                            //              std::cout << "   float - " << y <<
+                        //              std::endl;
-                            //              ": " <<
+                    }
-                            //              attr.floats(y)
+                case framework::proto::AttrType::LONG:
-                            //                        << std::endl;
+                //            std::cout << "   long: " << attr.l() <<
-                        }
+                //            std::endl;
-                    case framework::proto::AttrType::INTS:
+                case framework::proto::AttrType::FLOATS:
-                        for (int y = 0; y < attr.ints_size(); ++y) {
+                    for (int y = 0; y < attr.floats_size(); ++y) {
-                            //              std::cout << "   int - " << y << ":
+                        //              std::cout << "   float - " << y <<
-                            //              " <<
+                        //              ": " <<
-                            //              attr.ints(y)
+                        //              attr.floats(y)
-                            //                        << std::endl;
+                        //                        << std::endl;
-                        }
+                    }
-                    case framework::proto::AttrType::STRINGS:
+                case framework::proto::AttrType::INTS:
-                        for (int y = 0; y < attr.strings_size(); ++y) {
+                    for (int y = 0; y < attr.ints_size(); ++y) {
-                            //              std::cout << "   string - " << y <<
+                        //              std::cout << "   int - " << y << ":
-                            //              ": " <<
+                        //              " <<
-                            //              attr.strings(y)
+                        //              attr.ints(y)
-                            //                        << std::endl;
+                        //                        << std::endl;
-                        }
+                    }
+                case framework::proto::AttrType::STRINGS:
+                    for (int y = 0; y < attr.strings_size(); ++y) {
+                        //              std::cout << "   string - " << y <<
+                        //              ": " <<
+                        //              attr.strings(y)
+                        //                        << std::endl;
                    }
                }
            }
+        }
-            for (int k = 0; k < block.vars().size(); ++k) {
+        for (int k = 0; k < block.vars().size(); ++k) {
-                framework::proto::VarDesc var = block.vars()[k];
+            framework::proto::VarDesc var = block.vars()[k];
-                if (var.type().type() ==
+            if (var.type().type() == framework::proto::VarType::LOD_TENSOR) {
-                    framework::proto::VarType::LOD_TENSOR) {
+                //        std::cout << " var name: " << var.name() <<
-                    //        std::cout << " var name: " << var.name() <<
+                //        std::endl;
-                    //        std::endl;
+                const framework::proto::VarType::TensorDesc &tensor_desc =
-                    const framework::proto::VarType::TensorDesc &tensor_desc =
+                    var.type().lod_tensor().tensor();
-                        var.type().lod_tensor().tensor();
+                //        std::cout << "  in var tensor desc dims size "
-                    //        std::cout << "  in var tensor desc dims size "
+                //                  << tensor_desc.dims().size() <<
-                    //                  << tensor_desc.dims().size() <<
+                //                  std::endl;
-                    //                  std::endl;
+                int memory_size = 1;
-                    int memory_size = 1;
+                for (int l = 0; l < tensor_desc.dims().size(); ++l) {
-                    for (int l = 0; l < tensor_desc.dims().size(); ++l) {
+                    //          std::cout << " var tensor desc dim " << l
-                        //          std::cout << " var tensor desc dim " << l
+                    //                    << " value: " <<
-                        //                    << " value: " <<
+                    //                    tensor_desc.dims()[l] <<
-                        //                    tensor_desc.dims()[l] <<
+                    //                    std::endl;
-                        //                    std::endl;
-                    }
                }
+            }
-                if (var.persistable() &&
+            if (var.persistable() &&
-                    var.type().type() !=
+                var.type().type() !=
-                        framework::proto::VarType::FEED_MINIBATCH &&
+                    framework::proto::VarType::FEED_MINIBATCH &&
-                    var.type().type() !=
+                var.type().type() != framework::proto::VarType::FETCH_LIST) {
-                        framework::proto::VarType::FETCH_LIST) {
+                //        std::cout << "  to load " << var.name() <<
-                    //        std::cout << "  to load " << var.name() <<
+                //        std::endl;
-                    //        std::endl;
+                std::string file_path = dirname + "/" + var.name();
-                    std::string file_path = dirname + "/" + var.name();
+                std::ifstream is(file_path);
-                    std::ifstream is(file_path);
+                std::streampos pos = is.tellg(); //   save   current   position
-                    std::streampos pos =
+                is.seekg(0, std::ios::end);
-                        is.tellg(); //   save   current   position
+                //        std::cout << "  file length = " << is.tellg() <<
-                    is.seekg(0, std::ios::end);
+                //        std::endl;
-                    //        std::cout << "  file length = " << is.tellg() <<
+                is.seekg(pos); //   restore   saved   position
-                    //        std::endl;
-                    is.seekg(pos); //   restore   saved   position
+                // 1. version
+                uint32_t version;
-                    // 1. version
+                is.read(reinterpret_cast<char *>(&version), sizeof(version));
-                    uint32_t version;
+                //        std::cout << "   version: " << version <<
-                    is.read(reinterpret_cast<char *>(&version),
+                //        std::endl;
-                            sizeof(version));
-                    //        std::cout << "   version: " << version <<
+                // 2 Lod information
-                    //        std::endl;
+                uint64_t lod_level;
+                is.read(reinterpret_cast<char *>(&lod_level),
-                    // 2 Lod information
+                        sizeof(lod_level));
-                    uint64_t lod_level;
+                //        std::cout << "   load level: " << lod_level <<
-                    is.read(reinterpret_cast<char *>(&lod_level),
+                //        std::endl;
-                            sizeof(lod_level));
+                //        std::cout << "   lod info: " << std::endl;
-                    //        std::cout << "   load level: " << lod_level <<
+                for (uint64_t i = 0; i < lod_level; ++i) {
-                    //        std::endl;
+                    uint64_t size;
-                    //        std::cout << "   lod info: " << std::endl;
-                    for (uint64_t i = 0; i < lod_level; ++i) {
-                        uint64_t size;
-                        is.read(reinterpret_cast<char *>(&size), sizeof(size));
-                        std::vector<size_t> tmp(size / sizeof(size_t));
-                        is.read(reinterpret_cast<char *>(tmp.data()),
-                                static_cast<std::streamsize>(size));
-                        for (int j = 0; j < tmp.size(); ++j) {
-                            //            std::cout << "    lod - " << tmp[j] <<
-                            //            std::endl;
-                        }
-                    }
-                    uint32_t tensor_version;
-                    is.read(reinterpret_cast<char *>(&version),
-                            sizeof(version));
-                    //        std::cout << "   tensor_version: " <<
-                    //        tensor_version <<
-                    //        std::endl;
-                    int32_t size;
                    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-                    //        std::cout << "   tensor desc size: " << size <<
+                    std::vector<size_t> tmp(size / sizeof(size_t));
-                    //        std::endl;
+                    is.read(reinterpret_cast<char *>(tmp.data()),
-                    std::unique_ptr<char[]> buf(new char[size]);
+                            static_cast<std::streamsize>(size));
-                    is.read(reinterpret_cast<char *>(buf.get()), size);
+                    for (int j = 0; j < tmp.size(); ++j) {
+                        //            std::cout << "    lod - " << tmp[j] <<
-                    framework::proto::VarType::TensorDesc desc;
-                    desc.ParseFromArray(buf.get(), size);
-                    //        std::cout << "   desc dims size " <<
-                    //        desc.dims().size() <<
-                    //        std::endl;
-                    int memory_size = 1;
-                    for (int l = 0; l < desc.dims().size(); ++l) {
-                        //          std::cout << "    dim " << l << " value: "
-                        //          <<
-                        //          desc.dims()[l]
-                        //                    << std::endl;
-                        memory_size *= desc.dims()[l];
-                    }
-                    int type_size = 0;
-                    //        std::cout << "    desc pre type: ";
-                    switch (desc.data_type()) {
-                    case framework::proto::VarType::FP16:
-                        //            std::cout << "FP16" << std::endl;
-                        type_size = 2;
-                        break;
-                    case framework::proto::VarType::FP32:
-                        type_size = 4;
-                        //            std::cout << "FP32" << std::endl;
-                        break;
-                    case framework::proto::VarType::FP64:
-                        type_size = 8;
-                        //            std::cout << "FP64" << std::endl;
-                        break;
-                    case framework::proto::VarType::INT32:
-                        type_size = 4;
-                        //            std::cout << "INT32" << std::endl;
-                        break;
-                    case framework::proto::VarType::INT64:
-                        type_size = 8;
-                        //            std::cout << "INT64" << std::endl;
-                        break;
-                    case framework::proto::VarType::BOOL:
-                        type_size = 1;
-                        //            std::cout << "BOOL" << std::endl;
-                        break;
-                    default:
-                        break;
-                        //            std::cout << "    not support" <<
                        //            std::endl;
                    }
+                }
+                uint32_t tensor_version;
+                is.read(reinterpret_cast<char *>(&version), sizeof(version));
+                //        std::cout << "   tensor_version: " <<
+                //        tensor_version <<
+                //        std::endl;
+                int32_t size;
+                is.read(reinterpret_cast<char *>(&size), sizeof(size));
+                //        std::cout << "   tensor desc size: " << size <<
+                //        std::endl;
+                std::unique_ptr<char[]> buf(new char[size]);
+                is.read(reinterpret_cast<char *>(buf.get()), size);
+                framework::proto::VarType::TensorDesc desc;
+                desc.ParseFromArray(buf.get(), size);
+                //        std::cout << "   desc dims size " <<
+                //        desc.dims().size() <<
+                //        std::endl;
+                int memory_size = 1;
+                for (int l = 0; l < desc.dims().size(); ++l) {
+                    //          std::cout << "    dim " << l << " value: "
+                    //          <<
+                    //          desc.dims()[l]
+                    //                    << std::endl;
+                    memory_size *= desc.dims()[l];
+                }
-                    //        std::cout << "    malloc size: " << memory_size *
+                int type_size = 0;
-                    //        type_size
+                //        std::cout << "    desc pre type: ";
-                    //                  << std::endl;
+                switch (desc.data_type()) {
-                    void *memory = malloc(memory_size * type_size);
+                case framework::proto::VarType::FP16:
-                    is.read(static_cast<char *>(memory),
+                    //            std::cout << "FP16" << std::endl;
-                            memory_size * type_size);
+                    type_size = 2;
-                    //        std::cout << "    memory: " << memory <<
+                    break;
-                    //        std::endl;
+                case framework::proto::VarType::FP32:
-                    is.close();
+                    type_size = 4;
-                } else {
+                    //            std::cout << "FP32" << std::endl;
-                    //        std::cout << "  *not load "
+                    break;
-                    //                  << " var : " << var.name() << std::endl;
+                case framework::proto::VarType::FP64:
+                    type_size = 8;
+                    //            std::cout << "FP64" << std::endl;
+                    break;
+                case framework::proto::VarType::INT32:
+                    type_size = 4;
+                    //            std::cout << "INT32" << std::endl;
+                    break;
+                case framework::proto::VarType::INT64:
+                    type_size = 8;
+                    //            std::cout << "INT64" << std::endl;
+                    break;
+                case framework::proto::VarType::BOOL:
+                    type_size = 1;
+                    //            std::cout << "BOOL" << std::endl;
+                    break;
+                default:
+                    break;
+                    //            std::cout << "    not support" <<
+                    //            std::endl;
                }
+                //        std::cout << "    malloc size: " << memory_size *
+                //        type_size
+                //                  << std::endl;
+                void *memory = malloc(memory_size * type_size);
+                is.read(static_cast<char *>(memory), memory_size * type_size);
+                //        std::cout << "    memory: " << memory <<
+                //        std::endl;
+                is.close();
+            } else {
+                //        std::cout << "  *not load "
+                //                  << " var : " << var.name() << std::endl;
            }
        }
+    }
 #endif
-        return program;
+    return program;
-    }
+}
-    template class Loader<CPU, Precision::FP32>;
+template class Loader<CPU, Precision::FP32>;
 } // namespace paddle_mobile
--- a/src/io.h
+++ b/src/io.h
@@ -27,14 +27,13 @@ SOFTWARE.
 namespace paddle_mobile {
-    template <typename Dtype, Precision P = Precision::FP32>
+template <typename Dtype, Precision P = Precision::FP32>
-    class Loader : PaddleMobileObject {
+class Loader : PaddleMobileObject {
-      public:
+  public:
-        const framework::Program<Dtype, P> Load(const std::string &dirname);
+    const framework::Program<Dtype, P> Load(const std::string &dirname);
-      private:
+  private:
-        void LoadVar(framework::LoDTensor *tensor,
+    void LoadVar(framework::LoDTensor *tensor, const std::string &file_path);
-                     const std::string &file_path);
+};
-    };
 } // namespace paddle_mobile
--- a/src/memory/t_malloc.cc
+++ b/src/memory/t_malloc.cc
@@ -22,30 +22,30 @@ SOFTWARE.
 #include <cstring>
 namespace paddle_mobile {
-    namespace memory {
+namespace memory {
-        const int MALLOC_ALIGN = 16;
+const int MALLOC_ALIGN = 16;
-        void Copy(void *dst, const void *src, size_t num) {
+void Copy(void *dst, const void *src, size_t num) {
-            std::memcpy(dst, src, num);
+    std::memcpy(dst, src, num);
-        };
+};
-        void *Alloc(size_t size) {
+void *Alloc(size_t size) {
-            size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
+    size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
-            char *p = static_cast<char *>(malloc(offset + size));
+    char *p = static_cast<char *>(malloc(offset + size));
-            if (!p) {
+    if (!p) {
-                return nullptr;
+        return nullptr;
-            }
+    }
-            void *r = reinterpret_cast<void *>(
+    void *r = reinterpret_cast<void *>(reinterpret_cast<size_t>(p + offset) &
-                reinterpret_cast<size_t>(p + offset) & (~(MALLOC_ALIGN - 1)));
+                                       (~(MALLOC_ALIGN - 1)));
-            static_cast<void **>(r)[-1] = p;
+    static_cast<void **>(r)[-1] = p;
-            return r;
+    return r;
-        }
+}
-        void Free(void *ptr) {
+void Free(void *ptr) {
-            if (ptr) {
+    if (ptr) {
-                free(static_cast<void **>(ptr)[-1]);
+        free(static_cast<void **>(ptr)[-1]);
-            }
+    }
-        }
+}
-    } // namespace memory
+} // namespace memory
 } // namespace paddle_mobile
--- a/src/memory/t_malloc.h
+++ b/src/memory/t_malloc.h
@@ -21,44 +21,44 @@ SOFTWARE.
 #include <type_traits>
 namespace paddle_mobile {
-    namespace memory {
+namespace memory {
-        void Copy(void *dst, const void *src, size_t num);
+void Copy(void *dst, const void *src, size_t num);
-        void *Alloc(size_t size);
+void *Alloc(size_t size);
-        void Free(void *ptr);
+void Free(void *ptr);
-        /**
+/**
-         * \brief   Free memory block in one place.
+ * \brief   Free memory block in one place.
-         *
+ *
-         * \note    In some cases, custom deleter is used to
+ * \note    In some cases, custom deleter is used to
-         *          deallocate the memory automatically for
+ *          deallocate the memory automatically for
-         *          std::unique_ptr<T> in tensor.h.
+ *          std::unique_ptr<T> in tensor.h.
-         *          static_cast
+ *          static_cast
-         */
+ */
-        template <typename T> class PODDeleter {
+template <typename T> class PODDeleter {
-            static_assert(std::is_pod<T>::value, "T must be POD");
+    static_assert(std::is_pod<T>::value, "T must be POD");
-          public:
+  public:
-            explicit PODDeleter(){};
+    explicit PODDeleter(){};
-            void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
+    void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
-        };
+};
-        /**
+/**
-         * \brief   Free memory block in one place does not meet POD
+ * \brief   Free memory block in one place does not meet POD
-         *
+ *
-         * \note    In some cases, custom deleter is used to
+ * \note    In some cases, custom deleter is used to
-         *          deallocate the memory automatically for
+ *          deallocate the memory automatically for
-         *          std::unique_ptr<T> in tensor.h.
+ *          std::unique_ptr<T> in tensor.h.
-         *          reinterpret_cast
+ *          reinterpret_cast
-         */
+ */
-        template <typename T> class PlainDeleter {
+template <typename T> class PlainDeleter {
-          public:
+  public:
-            explicit PlainDeleter(){};
+    explicit PlainDeleter(){};
-            void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
+    void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
-        };
+};
-    } // namespace memory
+} // namespace memory
 } // namespace paddle_mobile
--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -19,58 +19,57 @@ SOFTWARE.
 #include "conv_op.h"
 #include "framework/data_type.h"
 #include "framework/op_proto_maker.h"
-#include "framework/operator.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        int ConvOutputSize(int input_size, int filter_size, int dilation,
+int ConvOutputSize(int input_size, int filter_size, int dilation, int padding,
-                           int padding, int stride) {
+                   int stride) {
-            const int dkernel = dilation * (filter_size - 1) + 1;
+    const int dkernel = dilation * (filter_size - 1) + 1;
-            int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
+    int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
-            return output_size;
+    return output_size;
-        }
+}
-        template <typename Dtype, typename T>
+template <typename Dtype, typename T>
-        void ConvOp<Dtype, T>::InferShape() const {
+void ConvOp<Dtype, T>::InferShape() const {
-            //  std::cout << " begin get dims: " << std::endl;
+    //  std::cout << " begin get dims: " << std::endl;
-            auto in_dims = param_.Input()->dims();
+    auto in_dims = param_.Input()->dims();
-            //  std::cout << " end get in dims: " << std::endl;
+    //  std::cout << " end get in dims: " << std::endl;
-            //  std::cout << " in_dims: " << in_dims << std::endl;
+    //  std::cout << " in_dims: " << in_dims << std::endl;
-            //  std::cout << " begin get Filter " << std::endl;
+    //  std::cout << " begin get Filter " << std::endl;
-            auto filter_dims = param_.Filter()->dims();
+    auto filter_dims = param_.Filter()->dims();
-            //  std::cout << " end get Filter " << std::endl;
+    //  std::cout << " end get Filter " << std::endl;
-            //  std::cout << " begin get Attrs " << std::endl;
+    //  std::cout << " begin get Attrs " << std::endl;
-            const std::vector<int> &strides = param_.Strides();
+    const std::vector<int> &strides = param_.Strides();
-            //  std::cout << " end get Attrs " << strides[0] << std::endl;
+    //  std::cout << " end get Attrs " << strides[0] << std::endl;
-            std::vector<int> paddings = param_.Paddings();
+    std::vector<int> paddings = param_.Paddings();
-            int groups = param_.Groups();
+    int groups = param_.Groups();
-            std::vector<int> dilations = param_.Dilations();
+    std::vector<int> dilations = param_.Dilations();
-            std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+    std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-            for (size_t i = 0; i < strides.size(); ++i) {
+    for (size_t i = 0; i < strides.size(); ++i) {
-                output_shape.push_back(
+        output_shape.push_back(ConvOutputSize(in_dims[i + 2],
-                    ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
+                                              filter_dims[i + 2], dilations[i],
-                                   dilations[i], paddings[i], strides[i]));
+                                              paddings[i], strides[i]));
-            }
+    }
-            framework::DDim ddim = framework::make_ddim(output_shape);
+    framework::DDim ddim = framework::make_ddim(output_shape);
-            param_.Output()->Resize(ddim);
+    param_.Output()->Resize(ddim);
-        }
+}
-        template class ConvOp<CPU, float>;
+template class ConvOp<CPU, float>;
-    } // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -22,33 +22,32 @@ SOFTWARE.
 #include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class ConvOp : public framework::OperatorWithKernel<DeviceType> {
+class ConvOp : public framework::OperatorWithKernel<DeviceType> {
-          public:
+  public:
-            ConvOp(const std::string &type, const VariableNameMap &inputs,
+    ConvOp(const std::string &type, const VariableNameMap &inputs,
-                   const VariableNameMap &outputs,
+           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
-                   const framework::AttributeMap &attrs,
+           std::shared_ptr<framework::Scope> scope)
-                   std::shared_ptr<framework::Scope> scope)
+        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                : framework::OperatorWithKernel<DeviceType>(
+                                                    attrs, scope),
-                      type, inputs, outputs, attrs, scope),
+          param_(inputs, outputs, attrs, *scope) {}
-                  param_(inputs, outputs, attrs, *scope) {}
+    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+    void InferShape() const override;
-            void InferShape() const override;
+    void Run() const {
-            void Run() const {
+        operators::ConvKernel<DeviceType, T, ConvParam> kernel;
-                operators::ConvKernel<DeviceType, T, ConvParam> kernel;
+        kernel.Compute(param_);
-                kernel.Compute(param_);
+        this->ClearVariables();
-                this->ClearVariables();
+    }
-            }
+  private:
-          private:
+    ConvParam param_;
-            ConvParam param_;
+};
-        };
+} // namespace operators
-    } // operators
+} // namespace paddle_mobile
-} // paddle_mobile
--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -19,13 +19,13 @@ SOFTWARE.
 #include "elementwise_add_op.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename Dtype, typename T>
+template <typename Dtype, typename T>
-        void ElementwiseAddOp<Dtype, T>::InferShape() const {
+void ElementwiseAddOp<Dtype, T>::InferShape() const {
-            auto x_dim = param_.InputX()->dims();
+    auto x_dim = param_.InputX()->dims();
-            param_.Out()->Resize(x_dim);
+    param_.Out()->Resize(x_dim);
-        }
-        template class ElementwiseAddOp<CPU, float>;
-    }
 }
+template class ElementwiseAddOp<CPU, float>;
+} // namespace operators
+} // namespace paddle_mobile
--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -21,35 +21,32 @@ SOFTWARE.
 #include "op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class ElementwiseAddOp
+class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
-            : public framework::OperatorWithKernel<DeviceType> {
+  public:
-          public:
+    ElementwiseAddOp(const std::string &type, const VariableNameMap &inputs,
-            ElementwiseAddOp(const std::string &type,
+                     const VariableNameMap &outputs,
-                             const VariableNameMap &inputs,
+                     const framework::AttributeMap attrs,
-                             const VariableNameMap &outputs,
+                     std::shared_ptr<framework::Scope> scope)
-                             const framework::AttributeMap attrs,
+        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                             std::shared_ptr<framework::Scope> scope)
+                                                    attrs, scope),
-                : framework::OperatorWithKernel<DeviceType>(
+          param_(inputs, outputs, attrs, *scope) {}
-                      type, inputs, outputs, attrs, scope),
-                  param_(inputs, outputs, attrs, *scope) {}
-            void Run() const {
+    void Run() const {
-                operators::ElementwiseAddKernel<DeviceType, T,
+        operators::ElementwiseAddKernel<DeviceType, T, ElementwiseAddParam>
-                                                ElementwiseAddParam>
+            kernel;
-                    kernel;
+        kernel.Compute(param_);
-                kernel.Compute(param_);
+    }
-            }
-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            void InferShape() const override;
+    void InferShape() const override;
-          protected:
+  protected:
-            ElementwiseAddParam param_;
+    ElementwiseAddParam param_;
-        };
+};
-    }
+} // namespace operators
-}
+} // namespace paddle_mobile
--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -19,146 +19,137 @@ SOFTWARE.
 #include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        bool IsExpand(const std::vector<int64_t> &filter_dim,
+bool IsExpand(const std::vector<int64_t> &filter_dim,
-                      const std::vector<int> &strides,
+              const std::vector<int> &strides, const std::vector<int> &paddings,
-                      const std::vector<int> &paddings,
+              const std::vector<int> &dilations) {
-                      const std::vector<int> &dilations) {
+    bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
-            bool filter_1 = true, strides_1 = true, padding_0 = true,
+    for (size_t j = 0; j < strides.size(); ++j) {
-                 dilation_1 = true;
+        filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
-            for (size_t j = 0; j < strides.size(); ++j) {
+        strides_1 = strides_1 && (strides[j] == 1);
-                filter_1 =
+        padding_0 = padding_0 && (paddings[j] == 0);
-                    filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
+        dilation_1 = dilation_1 && (dilations[j] == 1);
-                strides_1 = strides_1 && (strides[j] == 1);
+    }
-                padding_0 = padding_0 && (paddings[j] == 0);
+    return !(filter_1 && strides_1 && padding_0 && dilation_1);
-                dilation_1 = dilation_1 && (dilations[j] == 1);
+}
-            }
-            return !(filter_1 && strides_1 && padding_0 && dilation_1);
+template <>
-        }
+void ConvKernel<CPU, float, ConvParam>::Compute(const ConvParam &param) const {
+    LOG(kLOG_DEBUG) << param;
-        template <>
-        void ConvKernel<CPU, float, ConvParam>::Compute(
+    const Tensor *input = param.Input();
-            const ConvParam &param) const {
-            LOG(kLOG_DEBUG) << param;
+    // The filter will be reshaped in the calculations,
+    // so here use an assignment operation,
-            const Tensor *input = param.Input();
+    // that avoids modifying the variable in the Scope.
+    Tensor filter = *param.Filter();
-            // The filter will be reshaped in the calculations,
-            // so here use an assignment operation,
+    Tensor *output = param.Output();
-            // that avoids modifying the variable in the Scope.
+    //            output->mutable_data<T>(context.GetPlace());
-            Tensor filter = *param.Filter();
+    int groups = param.Groups();
-            Tensor *output = param.Output();
+    std::vector<int> strides = param.Strides();
-            //            output->mutable_data<T>(context.GetPlace());
+    std::vector<int> paddings = param.Paddings();
+    std::vector<int> dilations = param.Dilations();
-            int groups = param.Groups();
-            std::vector<int> strides = param.Strides();
+    DLOG << " compute end get Attrs " << strides[0];
-            std::vector<int> paddings = param.Paddings();
-            std::vector<int> dilations = param.Dilations();
+    const int batch_size = static_cast<int>(input->dims()[0]);
-            DLOG << " compute end get Attrs " << strides[0];
+    // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
+    // k_w}
-            const int batch_size = static_cast<int>(input->dims()[0]);
+    std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
+    // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
-            // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
+    // o_w}
-            // k_w}
+    std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-            std::vector<int64_t> filter_shape_vec(
-                framework::vectorize(filter.dims()));
+    // use col_shape in the im2col calculation
-            // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
+    // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
-            // o_w}
+    // k_w, o_d,
-            std::vector<int64_t> output_shape_vec(
+    // o_h, o_w}
-                framework::vectorize(output->dims()));
+    size_t data_dim = filter_shape_vec.size() - 2;
+    std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-            // use col_shape in the im2col calculation
+    col_shape_vec[0] = input->dims()[1] / groups;
-            // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
+    for (size_t j = 0; j < data_dim; ++j) {
-            // k_w, o_d,
+        col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-            // o_h, o_w}
+        col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-            size_t data_dim = filter_shape_vec.size() - 2;
+    }
-            std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+    framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-            col_shape_vec[0] = input->dims()[1] / groups;
-            for (size_t j = 0; j < data_dim; ++j) {
+    // use col_matrix_shape in the gemm calculation
-                col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
-                col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+    // o_d *
-            }
+    // o_h * o_w)
-            framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+    framework::DDim col_matrix_shape =
+        framework::flatten_to_2d(col_shape, data_dim + 1);
-            // use col_matrix_shape in the gemm calculation
-            // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
+    bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
-            // o_d *
+    Tensor col;
-            // o_h * o_w)
+    // col_matrix shares the same piece of data with col,
-            framework::DDim col_matrix_shape =
+    // but will be reshaped into a two-dimensional matrix shape
-                framework::flatten_to_2d(col_shape, data_dim + 1);
+    // to call the matrix multiplication interface.
+    Tensor col_matrix;
-            bool is_expand =
+    if (is_expand) {
-                IsExpand(filter_shape_vec, strides, paddings, dilations);
+        col.mutable_data<float>(col_shape);
-            Tensor col;
+        col_matrix.ShareDataWith(col);
-            // col_matrix shares the same piece of data with col,
+        col_matrix.Resize(col_matrix_shape);
-            // but will be reshaped into a two-dimensional matrix shape
+    }
-            // to call the matrix multiplication interface.
-            Tensor col_matrix;
+    framework::DDim input_shape = framework::slice_ddim(
-            if (is_expand) {
+        input->dims(), 1, static_cast<int>(input->dims().size()));
-                col.mutable_data<float>(col_shape);
+    framework::DDim filter_matrix_shape = {filter.dims()[0],
+                                           filter.numel() / filter.dims()[0]};
+    filter.Resize(filter_matrix_shape);
+    framework::DDim output_matrix_shape = {
+        output->dims()[1],
+        output->numel() / (output->dims()[0] * output->dims()[1])};
+    // convolution operator: im2col(or vol2col) + gemm
+    int in_step = static_cast<int>(input->dims()[1]) / groups;
+    int out_step = static_cast<int>(output->dims()[1]) / groups;
+    math::Vol2ColFunctor<CPU, float> vol2col;
+    math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+    //            auto& dev_ctx = context.template
+    //            device_context<DeviceContext>();
+    for (int i = 0; i < batch_size; i++) {
+        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+        for (int g = 0; g < groups; g++) {
+            Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+            if (!is_expand) {
+                col.ShareDataWith(in_slice);
                col_matrix.ShareDataWith(col);
                col_matrix.Resize(col_matrix_shape);
+            } else if (data_dim == 2U) {
+                // im2col
+                im2col(in_slice, dilations, strides,
+                       std::vector<int>{paddings[0], paddings[1], paddings[0],
+                                        paddings[1]},
+                       &col);
+            } else if (data_dim == 3U) {
+                // vol2col
+                vol2col(in_slice, dilations, strides, paddings, &col);
            }
-            framework::DDim input_shape = framework::slice_ddim(
+            // gemm
-                input->dims(), 1, static_cast<int>(input->dims().size()));
+            Tensor out_slice =
+                out_batch.Slice(g * out_step, (g + 1) * out_step);
-            framework::DDim filter_matrix_shape = {
+            Tensor filter_slice =
-                filter.dims()[0], filter.numel() / filter.dims()[0]};
+                filter.Slice(g * out_step, (g + 1) * out_step);
-            filter.Resize(filter_matrix_shape);
+            math::matmul<float>(filter_slice, false, col_matrix, false,
+                                float(1.0), &out_slice, float(0.0));
-            framework::DDim output_matrix_shape = {
-                output->dims()[1],
-                output->numel() / (output->dims()[0] * output->dims()[1])};
-            // convolution operator: im2col(or vol2col) + gemm
-            int in_step = static_cast<int>(input->dims()[1]) / groups;
-            int out_step = static_cast<int>(output->dims()[1]) / groups;
-            math::Vol2ColFunctor<CPU, float> vol2col;
-            math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-            //            auto& dev_ctx = context.template
-            //            device_context<DeviceContext>();
-            for (int i = 0; i < batch_size; i++) {
-                Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-                Tensor out_batch =
-                    output->Slice(i, i + 1).Resize(output_matrix_shape);
-                for (int g = 0; g < groups; g++) {
-                    Tensor in_slice =
-                        in_batch.Slice(g * in_step, (g + 1) * in_step);
-                    if (!is_expand) {
-                        col.ShareDataWith(in_slice);
-                        col_matrix.ShareDataWith(col);
-                        col_matrix.Resize(col_matrix_shape);
-                    } else if (data_dim == 2U) {
-                        // im2col
-                        im2col(in_slice, dilations, strides,
-                               std::vector<int>{paddings[0], paddings[1],
-                                                paddings[0], paddings[1]},
-                               &col);
-                    } else if (data_dim == 3U) {
-                        // vol2col
-                        vol2col(in_slice, dilations, strides, paddings, &col);
-                    }
-                    // gemm
-                    Tensor out_slice =
-                        out_batch.Slice(g * out_step, (g + 1) * out_step);
-                    Tensor filter_slice =
-                        filter.Slice(g * out_step, (g + 1) * out_step);
-                    math::matmul<float>(filter_slice, false, col_matrix, false,
-                                        float(1.0), &out_slice, float(0.0));
-                }
-            }
        }
+    }
+}
-        template class ConvKernel<CPU, float, ConvParam>;
+template class ConvKernel<CPU, float, ConvParam>;
-    } // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -17,25 +17,25 @@ limitations under the License. */
 #include "operators/kernel/elementwise_add_kernel.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename T> struct AddFunctor {
+template <typename T> struct AddFunctor {
-            inline T operator()(T a, T b) const { return a + b; }
+    inline T operator()(T a, T b) const { return a + b; }
-        };
+};
-        template <>
+template <>
-        void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
+void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
-            const ElementwiseAddParam &param) const {
+    const ElementwiseAddParam &param) const {
-            const Tensor *input_x = param.InputX();
+    const Tensor *input_x = param.InputX();
-            const Tensor *input_y = param.InputY();
+    const Tensor *input_y = param.InputY();
-            Tensor *Out = param.Out();
+    Tensor *Out = param.Out();
-            Out->mutable_data<float>();
+    Out->mutable_data<float>();
-            const int axis = param.Axis();
+    const int axis = param.Axis();
-            ElementwiseComputeEx<AddFunctor<float>, float>(
+    ElementwiseComputeEx<AddFunctor<float>, float>(input_x, input_y, axis,
-                input_x, input_y, axis, AddFunctor<float>(), Out);
+                                                   AddFunctor<float>(), Out);
-        }
+}
-        template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
+template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
-    } // namespace operators
+} // namespace operators
-} // namespace paddle
+} // namespace paddle_mobile
--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -21,36 +21,34 @@ SOFTWARE.
 #include "operators/kernel/mul_kernel.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <>
+template <>
-        void
+void MulKernel<CPU, float, MulParam>::Compute(const MulParam &param) const {
-        MulKernel<CPU, float, MulParam>::Compute(const MulParam &param) const {
+    const Tensor *input_x = param.InputX();
-            const Tensor *input_x = param.InputX();
+    const Tensor *input_y = param.InputY();
-            const Tensor *input_y = param.InputY();
+    Tensor *out = param.Out();
-            Tensor *out = param.Out();
+    out->mutable_data<float>();
-            out->mutable_data<float>();
+    const Tensor x_matrix =
-            const Tensor x_matrix =
+        input_x->dims().size() > 2
-                input_x->dims().size() > 2
+            ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
-                    ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
+            : *input_x;
-                    : *input_x;
+    const Tensor y_matrix =
-            const Tensor y_matrix =
+        input_y->dims().size() > 2
-                input_y->dims().size() > 2
+            ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
-                    ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
+            : *input_y;
-                    : *input_y;
+    auto out_dim = out->dims();
-            auto out_dim = out->dims();
+    if (out_dim.size() != 2) {
-            if (out_dim.size() != 2) {
+        out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
-                out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
+    }
-            }
+    math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
-            math::matmul<float>(x_matrix, false, y_matrix, false,
+                        out, static_cast<float>(0));
-                                static_cast<float>(1), out,
+    if (out_dim.size() != 2) {
-                                static_cast<float>(0));
+        out->Resize(out_dim);
-            if (out_dim.size() != 2) {
+    }
-                out->Resize(out_dim);
+}
-            }
-        }
-        template class MulKernel<CPU, float, MulParam>;
+template class MulKernel<CPU, float, MulParam>;
-    } // namespace operators
+} // namespace operators
-} // namespace paddle
+} // namespace paddle_mobile
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -25,15 +25,14 @@ SOFTWARE.
 #pragma once;
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T, typename P>
+template <typename DeviceType, typename T, typename P>
-        class ConvKernel
+class ConvKernel : public framework::OpKernelBase<DeviceType, ConvParam> {
-            : public framework::OpKernelBase<DeviceType, ConvParam> {
+  public:
-          public:
+    void Compute(const ConvParam &param) const;
-            void Compute(const ConvParam &param) const;
+};
-        };
+} // namespace operators
-    }
+} // namespace paddle_mobile
-}
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -22,15 +22,15 @@ SOFTWARE.
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T, typename P>
+template <typename DeviceType, typename T, typename P>
-        class ElementwiseAddKernel
+class ElementwiseAddKernel
-            : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
+    : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
-          public:
+  public:
-            void Compute(const ElementwiseAddParam &param) const;
+    void Compute(const ElementwiseAddParam &param) const;
-        };
+};
-    }
+} // namespace operators
-}
+} // namespace paddle_mobile
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -16,15 +16,13 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-#include "operators/kernel/conv_kernel.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        // template<>
+// template<>
-        // void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
+// void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
-        // {}
+// {}
-        //
+//
-        // template class ConvKernel<FPGA, float>;
+// template class ConvKernel<FPGA, float>;
-    }
 }
+} // namespace paddle_mobile
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -22,14 +22,14 @@ SOFTWARE.
 #pragma once;
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T, typename P>
+template <typename DeviceType, typename T, typename P>
-        class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
+class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
-          public:
+  public:
-            void Compute(const MulParam &param) const;
+    void Compute(const MulParam &param) const;
-        };
+};
-    }
+} // namespace operators
-}
+} // namespace paddle_mobile
--- a/src/operators/math/elementwise_op_function.h
+++ b/src/operators/math/elementwise_op_function.h
@@ -18,194 +18,190 @@ limitations under the License. */
 #define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        /*
+/*
-         * Out = X ⊙ Y
+ * Out = X ⊙ Y
-         * If Y's shape does not match X' shape, they will be reshaped.
+ * If Y's shape does not match X' shape, they will be reshaped.
-         * For example:
+ * For example:
-         * 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+ * 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
-         *    pre=2, n=3*4, post=5
+ *    pre=2, n=3*4, post=5
-         *    x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
+ *    x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
-         * 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
+ * 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
-         *    pre=2*3, n=4*5, post=1
+ *    pre=2*3, n=4*5, post=1
-         *    x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
+ *    x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
-         */
+ */
-        inline void get_mid_dims(const framework::DDim &x_dims,
+inline void get_mid_dims(const framework::DDim &x_dims,
-                                 const framework::DDim &y_dims, const int axis,
+                         const framework::DDim &y_dims, const int axis,
-                                 int *pre, int *n, int *post) {
+                         int *pre, int *n, int *post) {
-            *pre = 1;
+    *pre = 1;
-            *n = 1;
+    *n = 1;
-            *post = 1;
+    *post = 1;
-            // compute pre
+    // compute pre
-            for (int i = 0; i < axis; ++i) {
+    for (int i = 0; i < axis; ++i) {
-                (*pre) *= x_dims[i];
+        (*pre) *= x_dims[i];
-            }
+    }
-            for (int i = 0; i < y_dims.size(); ++i) {
+    for (int i = 0; i < y_dims.size(); ++i) {
-                assert(x_dims[i + axis] == y_dims[i]);
+        assert(x_dims[i + axis] == y_dims[i]);
-                /// "Broadcast dimension mismatch.");
+        /// "Broadcast dimension mismatch.");
-                (*n) *= y_dims[i];
+        (*n) *= y_dims[i];
-            }
+    }
-            for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
+    for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
-                (*post) *= x_dims[i];
+        (*post) *= x_dims[i];
-            }
+    }
+}
+/// remove dims tail 1. (4,20,1,1) -> (4,20)
+inline void trim_trailing_singular_dims(framework::DDim *dims) {
+    // Remove trailing dimensions of size 1 for y
+    auto actual_dims_size = dims->size();
+    for (; actual_dims_size != 0; --actual_dims_size) {
+        if ((*dims)[actual_dims_size - 1] != 1)
+            break;
+    }
+    if (actual_dims_size != dims->size()) {
+        auto actual_dims = framework::vectorize(*dims);
+        actual_dims.resize(actual_dims_size);
+        *dims = framework::make_ddim(actual_dims);
+    }
+}
+template <typename T> class RowwiseTransformIterator {
+  public:
+    RowwiseTransformIterator(const T *ptr, int n) : ptr_(ptr), i_(0), n_(n) {}
+    RowwiseTransformIterator<T> &operator++() {
+        ++i_;
+        if (UNLIKELY(i_ == n_)) {
+            i_ = 0;
        }
+        return *this;
-        /// remove dims tail 1. (4,20,1,1) -> (4,20)
+    }
-        inline void trim_trailing_singular_dims(framework::DDim *dims) {
-            // Remove trailing dimensions of size 1 for y
+    bool operator==(const RowwiseTransformIterator<T> &rhs) const {
-            auto actual_dims_size = dims->size();
+        return (ptr_ + i_) == &(*rhs);
-            for (; actual_dims_size != 0; --actual_dims_size) {
+    }
-                if ((*dims)[actual_dims_size - 1] != 1)
-                    break;
+    bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
-            }
+        return (ptr_ + i_) != &(*rhs);
-            if (actual_dims_size != dims->size()) {
+    }
-                auto actual_dims = framework::vectorize(*dims);
-                actual_dims.resize(actual_dims_size);
+    const T &operator*() { return ptr_[i_]; }
-                *dims = framework::make_ddim(actual_dims);
+  private:
+    const T *ptr_;
+    int i_;
+    int64_t n_;
+};
+/// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
+/// dimension
+/// in (4,20,2) is 2 ,
+/// (20,1) move 1 stride , to fill(add) 2 element with the same number.
+template <typename T> class MidWiseTransformIterator {
+  public:
+    MidWiseTransformIterator(const T *ptr, int n, int post)
+        : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
+    MidWiseTransformIterator<T> &operator++() {
+        ++j_;
+        if (UNLIKELY(j_ == post_)) {
+            ++i_;
+            j_ = 0;
+            if (UNLIKELY(i_ == n_)) {
+                i_ = 0;
            }
        }
+        return *this;
-        template <typename T> class RowwiseTransformIterator {
+    }
-          public:
-            RowwiseTransformIterator(const T *ptr, int n)
+    bool operator==(const MidWiseTransformIterator<T> &rhs) const {
-                : ptr_(ptr), i_(0), n_(n) {}
+        return (ptr_ + i_) == &(*rhs);
+    }
-            RowwiseTransformIterator<T> &operator++() {
-                ++i_;
+    bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
-                if (UNLIKELY(i_ == n_)) {
+        return (ptr_ + i_) != &(*rhs);
-                    i_ = 0;
+    }
-                }
-                return *this;
+    const T &operator*() { return ptr_[i_]; }
-            }
+  private:
-            bool operator==(const RowwiseTransformIterator<T> &rhs) const {
+    const T *ptr_;
-                return (ptr_ + i_) == &(*rhs);
+    int64_t i_;
-            }
+    int64_t j_;
+    int64_t n_;
-            bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
+    int64_t post_;
-                return (ptr_ + i_) != &(*rhs);
+};
-            }
+template <typename Functor, typename T, typename OutType = T>
-            const T &operator*() { return ptr_[i_]; }
+class TransformFunctor {
+  public:
-          private:
+    TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
-            const T *ptr_;
+                     framework::Tensor *z, Functor func)
-            int i_;
+        : x_(x->data<T>()), y_(y->data<T>()), z_(z->mutable_data<OutType>()),
-            int64_t n_;
+          nx_(x->numel()), func_(func) {}
-        };
+    inline void Run() const {
-        /// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
+        math::Transform trans;
-        /// dimension
+        // 同时执行func(x_, y_)传入z_。
-        /// in (4,20,2) is 2 ,
+        trans(x_, x_ + nx_, y_, z_, func_);
-        /// (20,1) move 1 stride , to fill(add) 2 element with the same number.
+    }
-        template <typename T> class MidWiseTransformIterator {
-          public:
+    inline void RunRowWise(int n, int pre) const {
-            MidWiseTransformIterator(const T *ptr, int n, int post)
+        math::Transform trans;
-                : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
+        trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_, func_);
+    }
-            MidWiseTransformIterator<T> &operator++() {
-                ++j_;
+    inline void RunMidWise(int n, int pre, int post) const {
-                if (UNLIKELY(j_ == post_)) {
+        math::Transform trans;
-                    ++i_;
+        trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post), z_,
-                    j_ = 0;
+              func_);
-                    if (UNLIKELY(i_ == n_)) {
+    }
-                        i_ = 0;
-                    }
+  private:
-                }
+    const T *x_;
-                return *this;
+    const T *y_;
-            }
+    OutType *z_;
+    int64_t nx_;
-            bool operator==(const MidWiseTransformIterator<T> &rhs) const {
+    Functor func_;
-                return (ptr_ + i_) == &(*rhs);
+};
-            }
+template <typename Functor, typename T, typename OutType = T>
-            bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
+void ElementwiseComputeEx(const framework::Tensor *x,
-                return (ptr_ + i_) != &(*rhs);
+                          const framework::Tensor *y, int axis, Functor func,
-            }
+                          framework::Tensor *z) {
+    TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
-            const T &operator*() { return ptr_[i_]; }
+    auto x_dims = x->dims();
-          private:
+    auto y_dims = y->dims();
-            const T *ptr_;
+    // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-            int64_t i_;
+    //                  "Rank of first input must >= rank of second
-            int64_t j_;
+    //                  input.");
-            int64_t n_;
-            int64_t post_;
+    if (x_dims == y_dims) {
-        };
+        functor.Run();
+        return;
-        template <typename Functor, typename T, typename OutType = T>
+    }
-        class TransformFunctor {
-          public:
+    /// axis = -1 represent the last dimension.
-            TransformFunctor(const framework::Tensor *x,
+    axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-                             const framework::Tensor *y, framework::Tensor *z,
+    // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-                             Functor func)
+    //               "Axis should be in range [0, x_dims)");
-                : x_(x->data<T>()), y_(y->data<T>()),
+    trim_trailing_singular_dims(&y_dims);
-                  z_(z->mutable_data<OutType>()), nx_(x->numel()), func_(func) {
+    axis = (y_dims.size() == 0) ? x_dims.size() : axis;
-            }
+    int pre, n, post;
-            inline void Run() const {
+    get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
-                math::Transform trans;
+    if (post == 1) {
-                // 同时执行func(x_, y_)传入z_。
+        functor.RunRowWise(n, pre);
-                trans(x_, x_ + nx_, y_, z_, func_);
+        return;
-            }
+    } else {
+        functor.RunMidWise(n, pre, post);
-            inline void RunRowWise(int n, int pre) const {
+        return;
-                math::Transform trans;
+    }
-                trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_,
+}
-                      func_);
-            }
+} // namespace operators
+} // namespace paddle_mobile
-            inline void RunMidWise(int n, int pre, int post) const {
-                math::Transform trans;
-                trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post),
-                      z_, func_);
-            }
-          private:
-            const T *x_;
-            const T *y_;
-            OutType *z_;
-            int64_t nx_;
-            Functor func_;
-        };
-        template <typename Functor, typename T, typename OutType = T>
-        void ElementwiseComputeEx(const framework::Tensor *x,
-                                  const framework::Tensor *y, int axis,
-                                  Functor func, framework::Tensor *z) {
-            TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
-            auto x_dims = x->dims();
-            auto y_dims = y->dims();
-            // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-            //                  "Rank of first input must >= rank of second
-            //                  input.");
-            if (x_dims == y_dims) {
-                functor.Run();
-                return;
-            }
-            /// axis = -1 represent the last dimension.
-            axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-            // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-            //               "Axis should be in range [0, x_dims)");
-            trim_trailing_singular_dims(&y_dims);
-            axis = (y_dims.size() == 0) ? x_dims.size() : axis;
-            int pre, n, post;
-            get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
-            if (post == 1) {
-                functor.RunRowWise(n, pre);
-                return;
-            } else {
-                functor.RunMidWise(n, pre, post);
-                return;
-            }
-        }
-    } // namespace operators
-} // namespace paddle
--- a/src/operators/math/im2col.cc
+++ b/src/operators/math/im2col.cc
@@ -16,349 +16,316 @@ limitations under the License. */
 #include "common/types.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            /*
+/*
-             * im = [input_channels, input_height, input_width]
+ * im = [input_channels, input_height, input_width]
-             * col =
+ * col =
-             *   [input_channels, filter_height, filter_width, output_height,
+ *   [input_channels, filter_height, filter_width, output_height,
-             * output_width]
+ * output_width]
-             */
+ */
-            template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
+template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
-              public:
+  public:
-                void operator()(const framework::Tensor &im,
+    void operator()(const framework::Tensor &im,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *col) {
-                                framework::Tensor *col) {
+        //    PADDLE_ENFORCE(im.dims().size() == 3);
-                    //    PADDLE_ENFORCE(im.dims().size() == 3);
+        //    PADDLE_ENFORCE(col->dims().size() == 5);
-                    //    PADDLE_ENFORCE(col->dims().size() == 5);
-                    int im_channels = im.dims()[0];
+        int im_channels = im.dims()[0];
-                    int im_height = im.dims()[1];
+        int im_height = im.dims()[1];
-                    int im_width = im.dims()[2];
+        int im_width = im.dims()[2];
-                    int filter_height = col->dims()[1];
+        int filter_height = col->dims()[1];
-                    int filter_width = col->dims()[2];
+        int filter_width = col->dims()[2];
-                    int col_height = col->dims()[3];
+        int col_height = col->dims()[3];
-                    int col_width = col->dims()[4];
+        int col_width = col->dims()[4];
-                    //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
+        //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
-                    //    -
+        //    -
-                    //                       ((dilation[0] * (filter_height - 1)
+        //                       ((dilation[0] * (filter_height - 1)
-                    //                       + 1))) /
+        //                       + 1))) /
-                    //                              stride[0] +
+        //                              stride[0] +
-                    //                          1,
+        //                          1,
-                    //                      col_height,
+        //                      col_height,
-                    //                      "Output_height and
+        //                      "Output_height and
-                    //                      padding(padding_up, padding_down)
+        //                      padding(padding_up, padding_down)
-                    //                      are " "inconsistent.");
+        //                      are " "inconsistent.");
-                    //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
+        //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
-                    //    -
+        //    -
-                    //                       ((dilation[1] * (filter_width - 1)
+        //                       ((dilation[1] * (filter_width - 1)
-                    //                       + 1))) /
+        //                       + 1))) /
-                    //                              stride[1] +
+        //                              stride[1] +
-                    //                          1,
+        //                          1,
-                    //                      col_width,
+        //                      col_width,
-                    //                      "Output_height and
+        //                      "Output_height and
-                    //                      padding(padding_up, padding_down)
+        //                      padding(padding_up, padding_down)
-                    //                      are " "inconsistent.");
+        //                      are " "inconsistent.");
-                    int channels_col =
+        int channels_col = im_channels * filter_height * filter_width;
-                        im_channels * filter_height * filter_width;
-                    const T *im_data = im.data<T>();
+        const T *im_data = im.data<T>();
-                    T *col_data = col->data<T>();
+        T *col_data = col->data<T>();
-                    for (int c = 0; c < channels_col; ++c) {
+        for (int c = 0; c < channels_col; ++c) {
-                        int w_offset = c % filter_width;
+            int w_offset = c % filter_width;
-                        int h_offset = (c / filter_width) % filter_height;
+            int h_offset = (c / filter_width) % filter_height;
-                        int c_im = c / (filter_width * filter_height);
+            int c_im = c / (filter_width * filter_height);
-                        for (int h = 0; h < col_height; ++h) {
+            for (int h = 0; h < col_height; ++h) {
-                            int im_row_idx = h * stride[0] - padding[0] +
+                int im_row_idx =
-                                             h_offset * dilation[0];
+                    h * stride[0] - padding[0] + h_offset * dilation[0];
-                            for (int w = 0; w < col_width; ++w) {
+                for (int w = 0; w < col_width; ++w) {
-                                int im_col_idx = w * stride[1] - padding[1] +
+                    int im_col_idx =
-                                                 w_offset * dilation[1];
+                        w * stride[1] - padding[1] + w_offset * dilation[1];
-                                int col_idx =
+                    int col_idx = (c * col_height + h) * col_width + w;
-                                    (c * col_height + h) * col_width + w;
+                    int im_idx =
-                                int im_idx =
+                        (im_row_idx + c_im * im_height) * im_width + im_col_idx;
-                                    (im_row_idx + c_im * im_height) * im_width +
-                                    im_col_idx;
-                                col_data[col_idx] =
+                    col_data[col_idx] =
-                                    (im_row_idx < 0 ||
+                        (im_row_idx < 0 || im_row_idx >= im_height ||
-                                     im_row_idx >= im_height ||
+                         im_col_idx < 0 || im_col_idx >= im_width)
-                                     im_col_idx < 0 || im_col_idx >= im_width)
+                            ? static_cast<T>(0)
-                                        ? static_cast<T>(0)
+                            : im_data[im_idx];
-                                        : im_data[im_idx];
-                            }
-                        }
-                    }
                }
-            };
+            }
+        }
+    }
+};
-            /*
+/*
-             * im = [input_channels, input_height, input_width]
+ * im = [input_channels, input_height, input_width]
-             * col =
+ * col =
-             *   [input_channels, filter_height, filter_width, output_height,
+ *   [input_channels, filter_height, filter_width, output_height,
-             * output_width]
+ * output_width]
-             */
+ */
-            template <class T> class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
+template <class T> class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
-              public:
+  public:
-                void operator()(const framework::Tensor &col,
+    void operator()(const framework::Tensor &col,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *im) {
-                                framework::Tensor *im) {
+        //    PADDLE_ENFORCE(im->dims().size() == 3);
-                    //    PADDLE_ENFORCE(im->dims().size() == 3);
+        //    PADDLE_ENFORCE(col.dims().size() == 5);
-                    //    PADDLE_ENFORCE(col.dims().size() == 5);
+        int im_channels = im->dims()[0];
-                    int im_channels = im->dims()[0];
+        int im_height = im->dims()[1];
-                    int im_height = im->dims()[1];
+        int im_width = im->dims()[2];
-                    int im_width = im->dims()[2];
+        int filter_height = col.dims()[1];
-                    int filter_height = col.dims()[1];
+        int filter_width = col.dims()[2];
-                    int filter_width = col.dims()[2];
+        int col_height = col.dims()[3];
-                    int col_height = col.dims()[3];
+        int col_width = col.dims()[4];
-                    int col_width = col.dims()[4];
-                    //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
+        //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
-                    //    -
+        //    -
-                    //                       ((dilation[0] * (filter_height - 1)
+        //                       ((dilation[0] * (filter_height - 1)
-                    //                       + 1))) /
+        //                       + 1))) /
-                    //                              stride[0] +
+        //                              stride[0] +
-                    //                          1,
+        //                          1,
-                    //                      col_height,
+        //                      col_height,
-                    //                      "Output_height and
+        //                      "Output_height and
-                    //                      padding(padding_up, padding_down)
+        //                      padding(padding_up, padding_down)
-                    //                      are " "inconsistent.");
+        //                      are " "inconsistent.");
-                    //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
+        //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
-                    //    -
+        //    -
-                    //                       ((dilation[1] * (filter_width - 1)
+        //                       ((dilation[1] * (filter_width - 1)
-                    //                       + 1))) /
+        //                       + 1))) /
-                    //                              stride[1] +
+        //                              stride[1] +
-                    //                          1,
+        //                          1,
-                    //                      col_width,
+        //                      col_width,
-                    //                      "Output_height and
+        //                      "Output_height and
-                    //                      padding(padding_up, padding_down)
+        //                      padding(padding_up, padding_down)
-                    //                      are " "inconsistent.");
+        //                      are " "inconsistent.");
-                    int channels_col =
+        int channels_col = im_channels * filter_height * filter_width;
-                        im_channels * filter_height * filter_width;
-                    T *im_data = im->data<T>();
+        T *im_data = im->data<T>();
-                    const T *col_data = col.data<T>();
+        const T *col_data = col.data<T>();
-                    for (int c = 0; c < channels_col; ++c) {
+        for (int c = 0; c < channels_col; ++c) {
-                        int w_offset = c % filter_width;
+            int w_offset = c % filter_width;
-                        int h_offset = (c / filter_width) % filter_height;
+            int h_offset = (c / filter_width) % filter_height;
-                        int c_im = c / (filter_width * filter_height);
+            int c_im = c / (filter_width * filter_height);
-                        for (int h = 0; h < col_height; ++h) {
+            for (int h = 0; h < col_height; ++h) {
-                            int im_row_idx = h * stride[0] - padding[0] +
+                int im_row_idx =
-                                             h_offset * dilation[0];
+                    h * stride[0] - padding[0] + h_offset * dilation[0];
-                            for (int w = 0; w < col_width; ++w) {
+                for (int w = 0; w < col_width; ++w) {
-                                int im_col_idx = w * stride[1] - padding[1] +
+                    int im_col_idx =
-                                                 w_offset * dilation[1];
+                        w * stride[1] - padding[1] + w_offset * dilation[1];
-                                if ((im_row_idx) >= 0 &&
+                    if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
-                                    (im_row_idx) < im_height &&
+                        (im_col_idx) >= 0 && (im_col_idx) < im_width) {
-                                    (im_col_idx) >= 0 &&
+                        im_data[(im_row_idx + c_im * im_height) * im_width +
-                                    (im_col_idx) < im_width) {
+                                im_col_idx] +=
-                                    im_data[(im_row_idx + c_im * im_height) *
+                            col_data[(c * col_height + h) * col_width + w];
-                                                im_width +
-                                            im_col_idx] +=
-                                        col_data[(c * col_height + h) *
-                                                     col_width +
-                                                 w];
-                                }
-                            }
-                        }
                    }
                }
-            };
+            }
+        }
+    }
+};
-            template class Im2ColFunctor<ColFormat::kCFO, CPU, float>;
+template class Im2ColFunctor<ColFormat::kCFO, CPU, float>;
-            template class Im2ColFunctor<ColFormat::kCFO, CPU, double>;
+template class Im2ColFunctor<ColFormat::kCFO, CPU, double>;
-            template class Col2ImFunctor<ColFormat::kCFO, CPU, float>;
+template class Col2ImFunctor<ColFormat::kCFO, CPU, float>;
-            template class Col2ImFunctor<ColFormat::kCFO, CPU, double>;
+template class Col2ImFunctor<ColFormat::kCFO, CPU, double>;
-            /*
+/*
-             * im = [input_channels, input_height, input_width]
+ * im = [input_channels, input_height, input_width]
-             * col =
+ * col =
-             *   [output_height, output_width, input_channels, filter_height,
+ *   [output_height, output_width, input_channels, filter_height,
-             * filter_width]
+ * filter_width]
-             */
+ */
-            template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
+template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
-              public:
+  public:
-                void operator()(const framework::Tensor &im,
+    void operator()(const framework::Tensor &im,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *col) {
-                                framework::Tensor *col) {
+        //    PADDLE_ENFORCE(im.dims().size() == 3);
-                    //    PADDLE_ENFORCE(im.dims().size() == 3);
+        //    PADDLE_ENFORCE(col->dims().size() == 5);
-                    //    PADDLE_ENFORCE(col->dims().size() == 5);
+        int im_channels = im.dims()[0];
-                    int im_channels = im.dims()[0];
+        int im_height = im.dims()[1];
-                    int im_height = im.dims()[1];
+        int im_width = im.dims()[2];
-                    int im_width = im.dims()[2];
+        int filter_height = col->dims()[3];
-                    int filter_height = col->dims()[3];
+        int filter_width = col->dims()[4];
-                    int filter_width = col->dims()[4];
+        int col_height = col->dims()[0];
-                    int col_height = col->dims()[0];
+        int col_width = col->dims()[1];
-                    int col_width = col->dims()[1];
-                    //    PADDLE_ENFORCE_EQ(
+        //    PADDLE_ENFORCE_EQ(
-                    //        (im_height + padding[0] + padding[2] -
+        //        (im_height + padding[0] + padding[2] -
-                    //        filter_height) / stride[0]
+        //        filter_height) / stride[0]
-                    //        + 1, col_height, "Output_height and
+        //        + 1, col_height, "Output_height and
-                    //        padding(padding_up,
+        //        padding(padding_up,
-                    //        padding_down) are " "inconsistent.");
+        //        padding_down) are " "inconsistent.");
-                    //    PADDLE_ENFORCE_EQ(
+        //    PADDLE_ENFORCE_EQ(
-                    //        (im_width + padding[1] + padding[3] -
+        //        (im_width + padding[1] + padding[3] -
-                    //        filter_width) / stride[1] +
+        //        filter_width) / stride[1] +
-                    //        1, col_width, "col_width and padding(padding_left,
+        //        1, col_width, "col_width and padding(padding_left,
-                    //        padding_right)
+        //        padding_right)
-                    //        are " "inconsistent.");
+        //        are " "inconsistent.");
-                    const T *im_data = im.data<T>();
+        const T *im_data = im.data<T>();
-                    T *col_data = col->data<T>();
+        T *col_data = col->data<T>();
-                    for (int col_row_idx = 0; col_row_idx < col_height;
+        for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
-                         ++col_row_idx) {
+            for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
-                        for (int col_col_idx = 0; col_col_idx < col_width;
+                for (int channel = 0; channel < im_channels; ++channel) {
-                             ++col_col_idx) {
+                    for (int filter_row_idx = 0; filter_row_idx < filter_height;
-                            for (int channel = 0; channel < im_channels;
+                         ++filter_row_idx) {
-                                 ++channel) {
+                        int im_row_offset = col_row_idx * stride[0] +
-                                for (int filter_row_idx = 0;
+                                            filter_row_idx - padding[0];
-                                     filter_row_idx < filter_height;
+                        for (int filter_col_idx = 0;
-                                     ++filter_row_idx) {
+                             filter_col_idx < filter_width; ++filter_col_idx) {
-                                    int im_row_offset =
+                            int im_col_offset = col_col_idx * stride[1] +
-                                        col_row_idx * stride[0] +
+                                                filter_col_idx - padding[1];
-                                        filter_row_idx - padding[0];
-                                    for (int filter_col_idx = 0;
-                                         filter_col_idx < filter_width;
-                                         ++filter_col_idx) {
-                                        int im_col_offset =
-                                            col_col_idx * stride[1] +
-                                            filter_col_idx - padding[1];
-                                        int col_offset =
+                            int col_offset =
-                                            ((((col_row_idx)*col_width +
+                                ((((col_row_idx)*col_width + col_col_idx) *
-                                               col_col_idx) *
+                                      im_channels +
-                                                  im_channels +
+                                  channel) *
-                                              channel) *
+                                     filter_height +
-                                                 filter_height +
+                                 filter_row_idx) *
-                                             filter_row_idx) *
+                                    filter_width +
-                                                filter_width +
+                                filter_col_idx;
-                                            filter_col_idx;
-                                        int im_offset = (channel * im_height +
+                            int im_offset =
-                                                         im_row_offset) *
+                                (channel * im_height + im_row_offset) *
-                                                            im_width +
+                                    im_width +
-                                                        im_col_offset;
+                                im_col_offset;
-                                        col_data[col_offset] =
+                            col_data[col_offset] =
-                                            (im_row_offset < 0 ||
+                                (im_row_offset < 0 ||
-                                             im_row_offset >= im_height ||
+                                 im_row_offset >= im_height ||
-                                             im_col_offset < 0 ||
+                                 im_col_offset < 0 || im_col_offset >= im_width)
-                                             im_col_offset >= im_width)
+                                    ? static_cast<T>(0)
-                                                ? static_cast<T>(0)
+                                    : im_data[im_offset];
-                                                : im_data[im_offset];
-                                    }
-                                }
-                            }
                        }
                    }
                }
-            };
+            }
+        }
+    }
+};
-            /*
+/*
-             * im = [input_channels, input_height, input_width]
+ * im = [input_channels, input_height, input_width]
-             * col =
+ * col =
-             *   [output_height, output_width, input_channels, filter_height,
+ *   [output_height, output_width, input_channels, filter_height,
-             * filter_width]
+ * filter_width]
-             */
+ */
-            template <class T> class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
+template <class T> class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
-              public:
+  public:
-                void operator()(const framework::Tensor &col,
+    void operator()(const framework::Tensor &col,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *im) {
-                                framework::Tensor *im) {
+        //    PADDLE_ENFORCE(im->dims().size() == 3);
-                    //    PADDLE_ENFORCE(im->dims().size() == 3);
+        //    PADDLE_ENFORCE(col.dims().size() == 5);
-                    //    PADDLE_ENFORCE(col.dims().size() == 5);
+        int im_channels = im->dims()[0];
-                    int im_channels = im->dims()[0];
+        int im_height = im->dims()[1];
-                    int im_height = im->dims()[1];
+        int im_width = im->dims()[2];
-                    int im_width = im->dims()[2];
+        int filter_height = col.dims()[3];
-                    int filter_height = col.dims()[3];
+        int filter_width = col.dims()[4];
-                    int filter_width = col.dims()[4];
+        int col_height = col.dims()[0];
-                    int col_height = col.dims()[0];
+        int col_width = col.dims()[1];
-                    int col_width = col.dims()[1];
-                    //    PADDLE_ENFORCE_EQ(
+        //    PADDLE_ENFORCE_EQ(
-                    //        (im_height + padding[0] + padding[2] -
+        //        (im_height + padding[0] + padding[2] -
-                    //        filter_height) / stride[0]
+        //        filter_height) / stride[0]
-                    //        + 1, col_height, "Output_height and
+        //        + 1, col_height, "Output_height and
-                    //        padding(padding_up,
+        //        padding(padding_up,
-                    //        padding_down) are " "inconsistent.");
+        //        padding_down) are " "inconsistent.");
-                    //    PADDLE_ENFORCE_EQ(
+        //    PADDLE_ENFORCE_EQ(
-                    //        (im_width + padding[1] + padding[3] -
+        //        (im_width + padding[1] + padding[3] -
-                    //        filter_width) / stride[1] +
+        //        filter_width) / stride[1] +
-                    //        1, col_width, "col_width and padding(padding_left,
+        //        1, col_width, "col_width and padding(padding_left,
-                    //        padding_right)
+        //        padding_right)
-                    //        are " "inconsistent.");
+        //        are " "inconsistent.");
-                    T *im_data = im->data<T>();
+        T *im_data = im->data<T>();
-                    const T *col_data = col.data<T>();
+        const T *col_data = col.data<T>();
-                    for (int col_row_idx = 0; col_row_idx < col_height;
+        for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
-                         ++col_row_idx) {
+            for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
-                        for (int col_col_idx = 0; col_col_idx < col_width;
+                for (int channel = 0; channel < im_channels; ++channel) {
-                             ++col_col_idx) {
+                    for (int filter_row_idx = 0; filter_row_idx < filter_height;
-                            for (int channel = 0; channel < im_channels;
+                         ++filter_row_idx) {
-                                 ++channel) {
+                        int im_row_offset = col_row_idx * stride[0] +
-                                for (int filter_row_idx = 0;
+                                            filter_row_idx - padding[0];
-                                     filter_row_idx < filter_height;
+                        for (int filter_col_idx = 0;
-                                     ++filter_row_idx) {
+                             filter_col_idx < filter_width; ++filter_col_idx) {
-                                    int im_row_offset =
+                            int im_col_offset = col_col_idx * stride[1] +
-                                        col_row_idx * stride[0] +
+                                                filter_col_idx - padding[1];
-                                        filter_row_idx - padding[0];
-                                    for (int filter_col_idx = 0;
-                                         filter_col_idx < filter_width;
-                                         ++filter_col_idx) {
-                                        int im_col_offset =
-                                            col_col_idx * stride[1] +
-                                            filter_col_idx - padding[1];
-                                        int col_offset =
+                            int col_offset =
-                                            (((col_row_idx * col_width +
+                                (((col_row_idx * col_width + col_col_idx) *
-                                               col_col_idx) *
+                                      im_channels +
-                                                  im_channels +
+                                  channel) *
-                                              channel) *
+                                     filter_height +
-                                                 filter_height +
+                                 filter_row_idx) *
-                                             filter_row_idx) *
+                                    filter_width +
-                                                filter_width +
+                                filter_col_idx;
-                                            filter_col_idx;
-                                        if (im_row_offset >= 0 &&
+                            if (im_row_offset >= 0 &&
-                                            im_row_offset < im_height &&
+                                im_row_offset < im_height &&
-                                            im_col_offset >= 0 &&
+                                im_col_offset >= 0 &&
-                                            im_col_offset < im_width) {
+                                im_col_offset < im_width) {
-                                            int im_offset =
+                                int im_offset =
-                                                (channel * im_height +
+                                    (channel * im_height + im_row_offset) *
-                                                 im_row_offset) *
+                                        im_width +
-                                                    im_width +
+                                    im_col_offset;
-                                                im_col_offset;
+                                im_data[im_offset] += col_data[col_offset];
-                                            im_data[im_offset] +=
-                                                col_data[col_offset];
-                                        }
-                                    }
-                                }
                            }
                        }
                    }
                }
-            };
+            }
+        }
+    }
+};
-            template class Im2ColFunctor<ColFormat::kOCF, CPU, float>;
+template class Im2ColFunctor<ColFormat::kOCF, CPU, float>;
-            template class Im2ColFunctor<ColFormat::kOCF, CPU, double>;
+template class Im2ColFunctor<ColFormat::kOCF, CPU, double>;
-            template class Col2ImFunctor<ColFormat::kOCF, CPU, float>;
+template class Col2ImFunctor<ColFormat::kOCF, CPU, float>;
-            template class Col2ImFunctor<ColFormat::kOCF, CPU, double>;
+template class Col2ImFunctor<ColFormat::kOCF, CPU, double>;
-        } // namespace math
+} // namespace math
-    }     // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/im2col.h
+++ b/src/operators/math/im2col.h
@@ -17,96 +17,94 @@ limitations under the License. */
 #include "framework/tensor.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            /* The storage format of the coldata in the Im2ColFunctor and
+/* The storage format of the coldata in the Im2ColFunctor and
-             * Col2ImFunctor. */
+ * Col2ImFunctor. */
-            enum class ColFormat { kCFO = 0, kOCF = 1 };
+enum class ColFormat { kCFO = 0, kOCF = 1 };
-            /*
+/*
-             * \brief Converts the image data of three dimensions(CHW) into a
+ * \brief Converts the image data of three dimensions(CHW) into a
-             * colData of
+ * colData of
-             *        five dimensions in the Im2ColFunctor calculation,
+ *        five dimensions in the Im2ColFunctor calculation,
-             *        And in the Col2ImFunctor calculation, it is reversed.
+ *        And in the Col2ImFunctor calculation, it is reversed.
-             *
+ *
-             * \param imData   Image data.
+ * \param imData   Image data.
-             * \param imShape  The shape of imData,
+ * \param imShape  The shape of imData,
-             *                 [input_channels, input_height, input_width].
+ *                 [input_channels, input_height, input_width].
-             * \param colData  Column data.
+ * \param colData  Column data.
-             * \param colShape The shape of colData.
+ * \param colShape The shape of colData.
-             *
+ *
-             * \param dilations    dilation data.
+ * \param dilations    dilation data.
-             * \param 2-dimension  [dilation_height, dilation_width].
+ * \param 2-dimension  [dilation_height, dilation_width].
-             *
+ *
-             * \param strides      stride data.
+ * \param strides      stride data.
-             * \param 2-dimension  [stride_height, stride_width].
+ * \param 2-dimension  [stride_height, stride_width].
-             *
+ *
-             * \param paddings     padding data.
+ * \param paddings     padding data.
-             * \param 4-dimension  [up_pad, left_pad, down_pad, right_pad].
+ * \param 4-dimension  [up_pad, left_pad, down_pad, right_pad].
-             *
+ *
-             * If the template argument Format is kCFO, the shape of colData is:
+ * If the template argument Format is kCFO, the shape of colData is:
-             * [input_channels, filter_height, filter_width, output_height,
+ * [input_channels, filter_height, filter_width, output_height,
-             * output_width]
+ * output_width]
-             * So, it is easy to reshape into a convolution matrix for
+ * So, it is easy to reshape into a convolution matrix for
-             * convolution
+ * convolution
-             * calculation based on matrix multiplication.
+ * calculation based on matrix multiplication.
-             * The shape of convolution matrix is [height, width], where the
+ * The shape of convolution matrix is [height, width], where the
-             * height is equal
+ * height is equal
-             * input_channels * filter_height * filter_width, and the width is
+ * input_channels * filter_height * filter_width, and the width is
-             * equal
+ * equal
-             * output_height * output_width.
+ * output_height * output_width.
-             *
+ *
-             * Reshape:
+ * Reshape:
-             *     shape of colData           shape of convolution matrix
+ *     shape of colData           shape of convolution matrix
-             *     [input_channels,
+ *     [input_channels,
-             *      filter_height,
+ *      filter_height,
-             *      filter_width,      ======>      [height, width]
+ *      filter_width,      ======>      [height, width]
-             *      output_height,
+ *      output_height,
-             *      output_width]
+ *      output_width]
-             *
+ *
-             * If the template argument Format is kOCF, the shape of colData is:
+ * If the template argument Format is kOCF, the shape of colData is:
-             * [output_height, output_width, input_channels, filter_height,
+ * [output_height, output_width, input_channels, filter_height,
-             * filter_width]
+ * filter_width]
-             * So, it is easy to reshape into a sequence matrix for rnn
+ * So, it is easy to reshape into a sequence matrix for rnn
-             * calculation.
+ * calculation.
-             * The shape of sequence matrix is [seq_length, step_size], where
+ * The shape of sequence matrix is [seq_length, step_size], where
-             * the seq_length
+ * the seq_length
-             * is equal output_height * output_width, and the step_size is equal
+ * is equal output_height * output_width, and the step_size is equal
-             * input_channels * filter_height * filter_width.
+ * input_channels * filter_height * filter_width.
-             *
+ *
-             * Reshape:
+ * Reshape:
-             *     shape of colData             shape of sequence matrix
+ *     shape of colData             shape of sequence matrix
-             *     [output_height,
+ *     [output_height,
-             *      output_width,
+ *      output_width,
-             *      input_channels,    ======>    [seqLength, stepSize]
+ *      input_channels,    ======>    [seqLength, stepSize]
-             *      filter_height,
+ *      filter_height,
-             *      filter_width]
+ *      filter_width]
-             *
+ *
-             * \note The caller needs to ensure that imShape.inputChannels is
+ * \note The caller needs to ensure that imShape.inputChannels is
-             * equal to
+ * equal to
-             *       colShape.inputChannels.
+ *       colShape.inputChannels.
-             */
+ */
-            template <ColFormat Format, typename DeviceType, typename T>
+template <ColFormat Format, typename DeviceType, typename T>
-            class Im2ColFunctor {
+class Im2ColFunctor {
-              public:
+  public:
-                void operator()(const framework::Tensor &im,
+    void operator()(const framework::Tensor &im,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *col);
-                                framework::Tensor *col);
+};
-            };
-            template <ColFormat Format, typename DeviceType, typename T>
+template <ColFormat Format, typename DeviceType, typename T>
-            class Col2ImFunctor {
+class Col2ImFunctor {
-              public:
+  public:
-                void operator()(const framework::Tensor &col,
+    void operator()(const framework::Tensor &col,
-                                const std::vector<int> &dilation,
+                    const std::vector<int> &dilation,
-                                const std::vector<int> &stride,
+                    const std::vector<int> &stride,
-                                const std::vector<int> &padding,
+                    const std::vector<int> &padding, framework::Tensor *im);
-                                framework::Tensor *im);
+};
-            };
-        } // namespace math
+} // namespace math
-    }     // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/math_function.cc
+++ b/src/operators/math/math_function.cc
@@ -15,125 +15,110 @@ limitations under the License. */
 #include "math_function.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            template <>
+template <>
-            void gemm<float>(const CBLAS_TRANSPOSE transA,
+void gemm<float>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
-                             const CBLAS_TRANSPOSE transB, const int M,
+                 const int M, const int N, const int K, const float alpha,
-                             const int N, const int K, const float alpha,
+                 const float *A, const float *B, const float beta, float *C) {
-                             const float *A, const float *B, const float beta,
+    int lda = (transA == CblasNoTrans) ? K : M;
-                             float *C) {
+    int ldb = (transB == CblasNoTrans) ? N : K;
-                int lda = (transA == CblasNoTrans) ? K : M;
+    int ldc = N;
-                int ldb = (transB == CblasNoTrans) ? N : K;
+    cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
-                int ldc = N;
+                beta, C, ldc);
-                cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
+}
-                            lda, B, ldb, beta, C, ldc);
-            }
+template <>
+void gemm<double>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
-            template <>
+                  const int M, const int N, const int K, const double alpha,
-            void gemm<double>(const CBLAS_TRANSPOSE transA,
+                  const double *A, const double *B, const double beta,
-                              const CBLAS_TRANSPOSE transB, const int M,
+                  double *C) {
-                              const int N, const int K, const double alpha,
+    int lda = (transA == CblasNoTrans) ? K : M;
-                              const double *A, const double *B,
+    int ldb = (transB == CblasNoTrans) ? N : K;
-                              const double beta, double *C) {
+    int ldc = N;
-                int lda = (transA == CblasNoTrans) ? K : M;
+    cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
-                int ldb = (transB == CblasNoTrans) ? N : K;
+                beta, C, ldc);
-                int ldc = N;
+}
-                cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
-                            lda, B, ldb, beta, C, ldc);
+template <>
-            }
+void gemm<float>(const bool transA, const bool transB, const int M, const int N,
+                 const int K, const float alpha, const float *A, const int lda,
-            template <>
+                 const float *B, const int ldb, const float beta, float *C,
-            void gemm<float>(const bool transA, const bool transB, const int M,
+                 const int ldc) {
-                             const int N, const int K, const float alpha,
+    cblas_sgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
-                             const float *A, const int lda, const float *B,
+                transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
-                             const int ldb, const float beta, float *C,
+                lda, B, ldb, beta, C, ldc);
-                             const int ldc) {
+}
-                cblas_sgemm(CblasRowMajor,
-                            transA == false ? CblasNoTrans : CblasTrans,
+template <>
-                            transB == false ? CblasNoTrans : CblasTrans, M, N,
+void gemm<double>(const bool transA, const bool transB, const int M,
-                            K, alpha, A, lda, B, ldb, beta, C, ldc);
+                  const int N, const int K, const double alpha, const double *A,
-            }
+                  const int lda, const double *B, const int ldb,
+                  const double beta, double *C, const int ldc) {
-            template <>
+    cblas_dgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
-            void gemm<double>(const bool transA, const bool transB, const int M,
+                transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
-                              const int N, const int K, const double alpha,
+                lda, B, ldb, beta, C, ldc);
-                              const double *A, const int lda, const double *B,
+}
-                              const int ldb, const double beta, double *C,
-                              const int ldc) {
+template <>
-                cblas_dgemm(CblasRowMajor,
+void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
-                            transA == false ? CblasNoTrans : CblasTrans,
+                   const framework::Tensor &matrix_b, bool trans_b, float alpha,
-                            transB == false ? CblasNoTrans : CblasTrans, M, N,
+                   framework::Tensor *matrix_out, float beta) {
-                            K, alpha, A, lda, B, ldb, beta, C, ldc);
+    auto dim_a = matrix_a.dims();
-            }
+    auto dim_b = matrix_b.dims();
+    auto dim_out = matrix_out->dims();
-            template <>
+    //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-            void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
+    //  dim_out.size() ==
-                               const framework::Tensor &matrix_b, bool trans_b,
+    //  2,
-                               float alpha, framework::Tensor *matrix_out,
+    //                 "The input and output of matmul be matrix");
-                               float beta) {
+    //
-                auto dim_a = matrix_a.dims();
+    //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-                auto dim_b = matrix_b.dims();
+    //                     platform::is_cpu_place(matrix_b.place())
-                auto dim_out = matrix_out->dims();
+    //                     &&
-                //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+    //                     platform::is_cpu_place(matrix_out->place()),
-                //  dim_out.size() ==
+    //                 "Matrix must all be in CPUPlace");
-                //  2,
-                //                 "The input and output of matmul be matrix");
+    int M = dim_out[0];
-                //
+    int N = dim_out[1];
-                //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
+    int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-                //                     platform::is_cpu_place(matrix_b.place())
-                //                     &&
+    CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
-                //                     platform::is_cpu_place(matrix_out->place()),
+    CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
-                //                 "Matrix must all be in CPUPlace");
+    gemm<float>(transA, transB, M, N, K, alpha, matrix_a.data<float>(),
-                int M = dim_out[0];
+                matrix_b.data<float>(), beta, matrix_out->data<float>());
-                int N = dim_out[1];
+}
-                int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+template <>
-                CBLAS_TRANSPOSE transA =
+void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
-                    (trans_a == false) ? CblasNoTrans : CblasTrans;
+                    const framework::Tensor &matrix_b, bool trans_b,
-                CBLAS_TRANSPOSE transB =
+                    double alpha, framework::Tensor *matrix_out, double beta) {
-                    (trans_b == false) ? CblasNoTrans : CblasTrans;
+    auto dim_a = matrix_a.dims();
+    auto dim_b = matrix_b.dims();
-                gemm<float>(transA, transB, M, N, K, alpha,
+    auto dim_out = matrix_out->dims();
-                            matrix_a.data<float>(), matrix_b.data<float>(),
+    //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-                            beta, matrix_out->data<float>());
+    //  dim_out.size() ==
-            }
+    //  2,
+    //                 "The input and output of matmul be matrix");
-            template <>
+    //
-            void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
+    //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-                                const framework::Tensor &matrix_b, bool trans_b,
+    //                     platform::is_cpu_place(matrix_b.place())
-                                double alpha, framework::Tensor *matrix_out,
+    //                     &&
-                                double beta) {
+    //                     platform::is_cpu_place(matrix_out->place()),
-                auto dim_a = matrix_a.dims();
+    //                 "Matrix must all be in CPUPlace");
-                auto dim_b = matrix_b.dims();
-                auto dim_out = matrix_out->dims();
+    int M = dim_out[0];
-                //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+    int N = dim_out[1];
-                //  dim_out.size() ==
+    int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-                //  2,
-                //                 "The input and output of matmul be matrix");
+    CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
-                //
+    CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
-                //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-                //                     platform::is_cpu_place(matrix_b.place())
+    gemm<double>(transA, transB, M, N, K, alpha, matrix_a.data<double>(),
-                //                     &&
+                 matrix_b.data<double>(), beta, matrix_out->data<double>());
-                //                     platform::is_cpu_place(matrix_out->place()),
+}
-                //                 "Matrix must all be in CPUPlace");
+} // namespace math
-                int M = dim_out[0];
+} // namespace operators
-                int N = dim_out[1];
-                int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-                CBLAS_TRANSPOSE transA =
-                    (trans_a == false) ? CblasNoTrans : CblasTrans;
-                CBLAS_TRANSPOSE transB =
-                    (trans_b == false) ? CblasNoTrans : CblasTrans;
-                gemm<double>(transA, transB, M, N, K, alpha,
-                             matrix_a.data<double>(), matrix_b.data<double>(),
-                             beta, matrix_out->data<double>());
-            }
-        } // namespace math
-    }     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/math_function.h
+++ b/src/operators/math/math_function.h
@@ -19,26 +19,24 @@ limitations under the License. */
 #include <cmath>
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            template <typename T>
+template <typename T>
-            void gemm(const CBLAS_TRANSPOSE transA,
+void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
-                      const CBLAS_TRANSPOSE transB, const int M, const int N,
+          const int M, const int N, const int K, const T alpha, const T *A,
-                      const int K, const T alpha, const T *A, const T *B,
+          const T *B, const T beta, T *C);
-                      const T beta, T *C);
+template <typename T>
-            template <typename T>
+void gemm(const bool transA, const bool transB, const int M, const int N,
-            void gemm(const bool transA, const bool transB, const int M,
+          const int K, const T alpha, const T *A, const int lda, const T *B,
-                      const int N, const int K, const T alpha, const T *A,
+          const int ldb, const T beta, T *C, const int ldc);
-                      const int lda, const T *B, const int ldb, const T beta,
-                      T *C, const int ldc);
+// matrix multiply with continuous memory
+template <typename T>
-            // matrix multiply with continuous memory
+void matmul(const framework::Tensor &matrix_a, bool trans_a,
-            template <typename T>
+            const framework::Tensor &matrix_b, bool trans_b, T alpha,
-            void matmul(const framework::Tensor &matrix_a, bool trans_a,
+            framework::Tensor *matrix_out, T beta);
-                        const framework::Tensor &matrix_b, bool trans_b,
+} // namespace math
-                        T alpha, framework::Tensor *matrix_out, T beta);
+} // namespace operators
-        } // namespace math
-    }     // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/transform.h
+++ b/src/operators/math/transform.h
@@ -17,41 +17,39 @@ limitations under the License. */
 #include <algorithm>
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            // Transform applys a unary or a binary functor on each element in a
+// Transform applys a unary or a binary functor on each element in a
-            // range defined by a pair of iterators.
+// range defined by a pair of iterators.
-            //
+//
-            // - The specialization for CPU calls std::transform.
+// - The specialization for CPU calls std::transform.
-            // - The specialization for CUDA calls thrust::tranform.
+// - The specialization for CUDA calls thrust::tranform.
-            //
+//
-            // NOTE: We need to define InputIter and OutputIter defined as
+// NOTE: We need to define InputIter and OutputIter defined as
-            //       different types, because the InputIter points op's inputs
+//       different types, because the InputIter points op's inputs
-            //       and
+//       and
-            //       OutputIter pints to op's outputs.
+//       OutputIter pints to op's outputs.
-            //
+//
-            // NOTE: We don't assume that InputIter to be const InputType* and
+// NOTE: We don't assume that InputIter to be const InputType* and
-            //       OutputIter to be OutputType*, because we might use a
+//       OutputIter to be OutputType*, because we might use a
-            //       iterator
+//       iterator
-            //       class, paddle::fluid::operators::RowwiseTRansformIterator.
+//       class, paddle::fluid::operators::RowwiseTRansformIterator.
-            struct Transform {
+struct Transform {
-                template <typename InputIter, typename OutputIter,
+    template <typename InputIter, typename OutputIter, typename UnaryOperation>
-                          typename UnaryOperation>
+    void operator()(InputIter first, InputIter last, OutputIter result,
-                void operator()(InputIter first, InputIter last,
+                    UnaryOperation op) {
-                                OutputIter result, UnaryOperation op) {
+        std::transform(first, last, result, op);
-                    std::transform(first, last, result, op);
+    }
-                }
+    template <typename InputIter1, typename InputIter2, typename OutputIter,
-                template <typename InputIter1, typename InputIter2,
+              typename BinaryOperation>
-                          typename OutputIter, typename BinaryOperation>
+    void operator()(InputIter1 first1, InputIter1 last1, InputIter2 first2,
-                void operator()(InputIter1 first1, InputIter1 last1,
+                    OutputIter result, BinaryOperation op) {
-                                InputIter2 first2, OutputIter result,
+        std::transform(first1, last1, first2, result, op);
-                                BinaryOperation op) {
+    }
-                    std::transform(first1, last1, first2, result, op);
+};
-                }
+} // namespace math
-            };
+} // namespace operators
-        }
+} // namespace paddle_mobile
-    } // namespace platform
-} // namespace paddle
--- a/src/operators/math/vol2col.cc
+++ b/src/operators/math/vol2col.cc
@@ -15,212 +15,200 @@ limitations under the License. */
 #include "vol2col.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            using Tensor = paddle_mobile::framework::Tensor;
+using Tensor = paddle_mobile::framework::Tensor;
-            /*
+/*
-             * vol = [input_channels, input_depth, input_height, input_width]
+ * vol = [input_channels, input_depth, input_height, input_width]
-             * col =
+ * col =
-             *   [input_channels, filter_depth, filter_height, filter_width,
+ *   [input_channels, filter_depth, filter_height, filter_width,
-             *                    output_depth, output_height, output_width]
+ *                    output_depth, output_height, output_width]
-             */
+ */
-            template <typename T> class Vol2ColFunctor<CPU, T> {
+template <typename T> class Vol2ColFunctor<CPU, T> {
-              public:
+  public:
-                void operator()(const Tensor &vol,
+    void operator()(const Tensor &vol, const std::vector<int> &dilations,
-                                const std::vector<int> &dilations,
+                    const std::vector<int> &strides,
-                                const std::vector<int> &strides,
+                    const std::vector<int> &paddings, Tensor *col) const {
-                                const std::vector<int> &paddings,
+        //    PADDLE_ENFORCE(vol.dims().size() == 4);
-                                Tensor *col) const {
+        //    PADDLE_ENFORCE(col->dims().size() == 7);
-                    //    PADDLE_ENFORCE(vol.dims().size() == 4);
-                    //    PADDLE_ENFORCE(col->dims().size() == 7);
+        int input_channels = vol.dims()[0];
+        int input_depth = vol.dims()[1];
-                    int input_channels = vol.dims()[0];
+        int input_height = vol.dims()[2];
-                    int input_depth = vol.dims()[1];
+        int input_width = vol.dims()[3];
-                    int input_height = vol.dims()[2];
+        int filter_depth = col->dims()[1];
-                    int input_width = vol.dims()[3];
+        int filter_height = col->dims()[2];
-                    int filter_depth = col->dims()[1];
+        int filter_width = col->dims()[3];
-                    int filter_height = col->dims()[2];
+        int output_depth = col->dims()[4];
-                    int filter_width = col->dims()[3];
+        int output_height = col->dims()[5];
-                    int output_depth = col->dims()[4];
+        int output_width = col->dims()[6];
-                    int output_height = col->dims()[5];
+        int channels_col =
-                    int output_width = col->dims()[6];
+            input_channels * filter_depth * filter_height * filter_width;
-                    int channels_col = input_channels * filter_depth *
-                                       filter_height * filter_width;
+        //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+        //                       ((dilations[0] * (filter_depth - 1)
-                    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+        //                       + 1))) /
-                    //                       ((dilations[0] * (filter_depth - 1)
+        //                              strides[0] +
-                    //                       + 1))) /
+        //                          1,
-                    //                              strides[0] +
+        //                      output_depth,
-                    //                          1,
+        //                      "input_depth and output_depth are "
-                    //                      output_depth,
+        //                      "mismatching.");
-                    //                      "input_depth and output_depth are "
+        //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-                    //                      "mismatching.");
+        //                       ((dilations[1] * (filter_height -
-                    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+        //                       1) + 1))) /
-                    //                       ((dilations[1] * (filter_height -
+        //                              strides[1] +
-                    //                       1) + 1))) /
+        //                          1,
-                    //                              strides[1] +
+        //                      output_height,
-                    //                          1,
+        //                      "input_height and output_height are
-                    //                      output_height,
+        //                      "
-                    //                      "input_height and output_height are
+        //                      "mismatching.");
-                    //                      "
+        //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-                    //                      "mismatching.");
+        //                       ((dilations[2] * (filter_width - 1)
-                    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+        //                       + 1))) /
-                    //                       ((dilations[2] * (filter_width - 1)
+        //                              strides[2] +
-                    //                       + 1))) /
+        //                          1,
-                    //                              strides[2] +
+        //                      output_width,
-                    //                          1,
+        //                      "input_width and output_width are "
-                    //                      output_width,
+        //                      "mismatching.");
-                    //                      "input_width and output_width are "
-                    //                      "mismatching.");
+        const T *vol_data = vol.data<T>();
+        T *col_data = col->data<T>();
-                    const T *vol_data = vol.data<T>();
-                    T *col_data = col->data<T>();
+        for (int c = 0; c < channels_col; ++c) {
+            int w_offset = c % filter_width;
-                    for (int c = 0; c < channels_col; ++c) {
+            int h_offset = (c / filter_width) % filter_height;
-                        int w_offset = c % filter_width;
+            int d_offset = (c / filter_width / filter_height) % filter_depth;
-                        int h_offset = (c / filter_width) % filter_height;
+            int c_in = c / filter_width / filter_height / filter_depth;
-                        int d_offset =
+            for (int d = 0; d < output_depth; ++d) {
-                            (c / filter_width / filter_height) % filter_depth;
+                int d_pad =
-                        int c_in =
+                    d * strides[0] - paddings[0] + d_offset * dilations[0];
-                            c / filter_width / filter_height / filter_depth;
+                for (int h = 0; h < output_height; ++h) {
-                        for (int d = 0; d < output_depth; ++d) {
+                    int h_pad =
-                            int d_pad = d * strides[0] - paddings[0] +
+                        h * strides[1] - paddings[1] + h_offset * dilations[1];
-                                        d_offset * dilations[0];
+                    for (int w = 0; w < output_width; ++w) {
-                            for (int h = 0; h < output_height; ++h) {
+                        int w_pad = w * strides[2] - paddings[2] +
-                                int h_pad = h * strides[1] - paddings[1] +
+                                    w_offset * dilations[2];
-                                            h_offset * dilations[1];
-                                for (int w = 0; w < output_width; ++w) {
+                        int col_idx =
-                                    int w_pad = w * strides[2] - paddings[2] +
+                            ((c * output_depth + d) * output_height + h) *
-                                                w_offset * dilations[2];
+                                output_width +
+                            w;
-                                    int col_idx = ((c * output_depth + d) *
+                        int vol_idx =
-                                                       output_height +
+                            ((c_in * input_depth + d_pad) * input_height +
-                                                   h) *
+                             h_pad) *
-                                                      output_width +
+                                input_width +
-                                                  w;
+                            w_pad;
-                                    int vol_idx =
+                        col_data[col_idx] =
-                                        ((c_in * input_depth + d_pad) *
+                            (h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
-                                             input_height +
+                             w_pad >= input_width || d_pad < 0 ||
-                                         h_pad) *
+                             d_pad >= input_depth)
-                                            input_width +
+                                ? static_cast<T>(0)
-                                        w_pad;
+                                : vol_data[vol_idx];
-                                    col_data[col_idx] =
-                                        (h_pad < 0 || h_pad >= input_height ||
-                                         w_pad < 0 || w_pad >= input_width ||
-                                         d_pad < 0 || d_pad >= input_depth)
-                                            ? static_cast<T>(0)
-                                            : vol_data[vol_idx];
-                                }
-                            }
-                        }
                    }
                }
-            };
+            }
+        }
-            /*
+    }
-             * vol = [input_channels,input_depth, input_height, input_width]
+};
-             * col =
-             *   [input_channels, filter_depth, filter_height, filter_width,
+/*
-             *                    output_depth, output_height, output_width]
+ * vol = [input_channels,input_depth, input_height, input_width]
-             */
+ * col =
-            template <typename T> class Col2VolFunctor<CPU, T> {
+ *   [input_channels, filter_depth, filter_height, filter_width,
-              public:
+ *                    output_depth, output_height, output_width]
-                void operator()(const Tensor &col,
+ */
-                                const std::vector<int> &dilations,
+template <typename T> class Col2VolFunctor<CPU, T> {
-                                const std::vector<int> &strides,
+  public:
-                                const std::vector<int> &paddings,
+    void operator()(const Tensor &col, const std::vector<int> &dilations,
-                                Tensor *vol) const {
+                    const std::vector<int> &strides,
-                    //    PADDLE_ENFORCE(vol->dims().size() == 4);
+                    const std::vector<int> &paddings, Tensor *vol) const {
-                    //    PADDLE_ENFORCE(col.dims().size() == 7);
+        //    PADDLE_ENFORCE(vol->dims().size() == 4);
+        //    PADDLE_ENFORCE(col.dims().size() == 7);
-                    int input_channels = vol->dims()[0];
-                    int input_depth = vol->dims()[1];
+        int input_channels = vol->dims()[0];
-                    int input_height = vol->dims()[2];
+        int input_depth = vol->dims()[1];
-                    int input_width = vol->dims()[3];
+        int input_height = vol->dims()[2];
-                    int filter_depth = col.dims()[1];
+        int input_width = vol->dims()[3];
-                    int filter_height = col.dims()[2];
+        int filter_depth = col.dims()[1];
-                    int filter_width = col.dims()[3];
+        int filter_height = col.dims()[2];
-                    int output_depth = col.dims()[4];
+        int filter_width = col.dims()[3];
-                    int output_height = col.dims()[5];
+        int output_depth = col.dims()[4];
-                    int output_width = col.dims()[6];
+        int output_height = col.dims()[5];
-                    int channels_col = input_channels * filter_depth *
+        int output_width = col.dims()[6];
-                                       filter_height * filter_width;
+        int channels_col =
+            input_channels * filter_depth * filter_height * filter_width;
-                    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-                    //                       ((dilations[0] * (filter_depth - 1)
+        //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-                    //                       + 1))) /
+        //                       ((dilations[0] * (filter_depth - 1)
-                    //                              strides[0] +
+        //                       + 1))) /
-                    //                          1,
+        //                              strides[0] +
-                    //                      output_depth,
+        //                          1,
-                    //                      "input_depth and output_depth are "
+        //                      output_depth,
-                    //                      "mismatching.");
+        //                      "input_depth and output_depth are "
-                    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+        //                      "mismatching.");
-                    //                       ((dilations[1] * (filter_height -
+        //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-                    //                       1) + 1))) /
+        //                       ((dilations[1] * (filter_height -
-                    //                              strides[1] +
+        //                       1) + 1))) /
-                    //                          1,
+        //                              strides[1] +
-                    //                      output_height,
+        //                          1,
-                    //                      "input_height and output_height are
+        //                      output_height,
-                    //                      "
+        //                      "input_height and output_height are
-                    //                      "mismatching.");
+        //                      "
-                    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+        //                      "mismatching.");
-                    //                       ((dilations[2] * (filter_width - 1)
+        //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-                    //                       + 1))) /
+        //                       ((dilations[2] * (filter_width - 1)
-                    //                              strides[2] +
+        //                       + 1))) /
-                    //                          1,
+        //                              strides[2] +
-                    //                      output_width,
+        //                          1,
-                    //                      "input_width and output_width are "
+        //                      output_width,
-                    //                      "mismatching.");
+        //                      "input_width and output_width are "
-                    T *vol_data = vol->data<T>();
+        //                      "mismatching.");
-                    const T *col_data = col.data<T>();
+        T *vol_data = vol->data<T>();
+        const T *col_data = col.data<T>();
-                    for (int c = 0; c < channels_col; ++c) {
-                        int w_offset = c % filter_width;
+        for (int c = 0; c < channels_col; ++c) {
-                        int h_offset = (c / filter_width) % filter_height;
+            int w_offset = c % filter_width;
-                        int d_offset =
+            int h_offset = (c / filter_width) % filter_height;
-                            (c / filter_width / filter_height) % filter_depth;
+            int d_offset = (c / filter_width / filter_height) % filter_depth;
-                        int cIm =
+            int cIm = c / filter_width / filter_height / filter_depth;
-                            c / filter_width / filter_height / filter_depth;
+            for (int d = 0; d < output_depth; ++d) {
-                        for (int d = 0; d < output_depth; ++d) {
+                int d_pad =
-                            int d_pad = d * strides[0] - paddings[0] +
+                    d * strides[0] - paddings[0] + d_offset * dilations[0];
-                                        d_offset * dilations[0];
+                for (int h = 0; h < output_height; ++h) {
-                            for (int h = 0; h < output_height; ++h) {
+                    int h_pad =
-                                int h_pad = h * strides[1] - paddings[1] +
+                        h * strides[1] - paddings[1] + h_offset * dilations[1];
-                                            h_offset * dilations[1];
+                    for (int w = 0; w < output_width; ++w) {
-                                for (int w = 0; w < output_width; ++w) {
+                        int w_pad = w * strides[2] - paddings[2] +
-                                    int w_pad = w * strides[2] - paddings[2] +
+                                    w_offset * dilations[2];
-                                                w_offset * dilations[2];
+                        if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
-                                    if (h_pad >= 0 && h_pad < input_height &&
+                            w_pad < input_width && d_pad >= 0 &&
-                                        w_pad >= 0 && w_pad < input_width &&
+                            d_pad < input_depth) {
-                                        d_pad >= 0 && d_pad < input_depth) {
+                            int vol_idx =
-                                        int vol_idx =
+                                ((cIm * input_depth + d_pad) * input_height +
-                                            ((cIm * input_depth + d_pad) *
+                                 h_pad) *
-                                                 input_height +
+                                    input_width +
-                                             h_pad) *
+                                w_pad;
-                                                input_width +
-                                            w_pad;
+                            int col_idx =
+                                ((c * output_depth + d) * output_height + h) *
-                                        int col_idx = ((c * output_depth + d) *
+                                    output_width +
-                                                           output_height +
+                                w;
-                                                       h) *
+                            vol_data[vol_idx] += col_data[col_idx];
-                                                          output_width +
-                                                      w;
-                                        vol_data[vol_idx] += col_data[col_idx];
-                                    }
-                                }
-                            }
                        }
                    }
                }
-            };
+            }
+        }
-            template class Vol2ColFunctor<CPU, float>;
+    }
-            template class Vol2ColFunctor<CPU, double>;
+};
-            template class Col2VolFunctor<CPU, float>;
-            template class Col2VolFunctor<CPU, double>;
+template class Vol2ColFunctor<CPU, float>;
+template class Vol2ColFunctor<CPU, double>;
-        } // namespace math
+template class Col2VolFunctor<CPU, float>;
-    }     // namespace operators
+template class Col2VolFunctor<CPU, double>;
+} // namespace math
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/math/vol2col.h
+++ b/src/operators/math/vol2col.h
@@ -18,78 +18,74 @@ limitations under the License. */
 #include "framework/tensor.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        namespace math {
+namespace math {
-            /*
+/*
-             * \brief Converts the feature data of four dimensions(CDHW) into a
+ * \brief Converts the feature data of four dimensions(CDHW) into a
-             * colData of
+ * colData of
-             *        seven dimensions in the Vol2ColFunctor calculation,
+ *        seven dimensions in the Vol2ColFunctor calculation,
-             *        And in the Col2VolFunctor calculation, it is reversed.
+ *        And in the Col2VolFunctor calculation, it is reversed.
-             *
+ *
-             * \param volData   Vol data.
+ * \param volData   Vol data.
-             * \param volShape  The shape of volData,
+ * \param volShape  The shape of volData,
-             *                 [input_channels, input_depth, input_height,
+ *                 [input_channels, input_depth, input_height,
-             * input_width].
+ * input_width].
-             * \param colData  Column data.
+ * \param colData  Column data.
-             * \param colShape The shape of colData.
+ * \param colShape The shape of colData.
-             *
+ *
-             * \param dilations    dilation data.
+ * \param dilations    dilation data.
-             * \param 3-dimension  [dilation_depth, dilation_height,
+ * \param 3-dimension  [dilation_depth, dilation_height,
-             * dilation_width].
+ * dilation_width].
-             *
+ *
-             * \param strides      stride data.
+ * \param strides      stride data.
-             * \param 3-dimension  [stride_depth, stride_height, stride_width].
+ * \param 3-dimension  [stride_depth, stride_height, stride_width].
-             *
+ *
-             * \param paddings     padding data.
+ * \param paddings     padding data.
-             * \param 3-dimension  [d_pad, h_pad, w_pad].
+ * \param 3-dimension  [d_pad, h_pad, w_pad].
-             *
+ *
-             * The shape of colData is:
+ * The shape of colData is:
-             * [input_channels, filter_depth, filter_height, filter_width,
+ * [input_channels, filter_depth, filter_height, filter_width,
-             * output_depth,
+ * output_depth,
-             * output_height, output_width]
+ * output_height, output_width]
-             * So, it is easy to reshape into a convolution matrix for
+ * So, it is easy to reshape into a convolution matrix for
-             * convolution
+ * convolution
-             * calculation based on matrix multiplication.
+ * calculation based on matrix multiplication.
-             * The shape of convolution matrix is [height, width], where the
+ * The shape of convolution matrix is [height, width], where the
-             * height is equal
+ * height is equal
-             * input_channels * filter_depth * filter_height * filter_width, and
+ * input_channels * filter_depth * filter_height * filter_width, and
-             * the width
+ * the width
-             * is equal output_depth * output_height * output_width.
+ * is equal output_depth * output_height * output_width.
-             *
+ *
-             * Reshape:
+ * Reshape:
-             *     shape of colData           shape of convolution matrix
+ *     shape of colData           shape of convolution matrix
-             *     [input_channels,
+ *     [input_channels,
-             *      filter_depth,
+ *      filter_depth,
-             *      filter_height,
+ *      filter_height,
-             *      filter_width,      ======>      [height, width]
+ *      filter_width,      ======>      [height, width]
-             *      output_depth,
+ *      output_depth,
-             *      output_height,
+ *      output_height,
-             *      output_width]
+ *      output_width]
-             *
+ *
-             * \note The caller needs to ensure that volShape.inputChannels is
+ * \note The caller needs to ensure that volShape.inputChannels is
-             * equal to
+ * equal to
-             *       colShape.inputChannels.
+ *       colShape.inputChannels.
-             */
+ */
-            using Tensor = paddle_mobile::framework::Tensor;
+using Tensor = paddle_mobile::framework::Tensor;
-            template <typename DeviceType, typename T> class Vol2ColFunctor {
+template <typename DeviceType, typename T> class Vol2ColFunctor {
-              public:
+  public:
-                void operator()(const Tensor &vol,
+    void operator()(const Tensor &vol, const std::vector<int> &dilations,
-                                const std::vector<int> &dilations,
+                    const std::vector<int> &strides,
-                                const std::vector<int> &strides,
+                    const std::vector<int> &paddings, Tensor *col) const;
-                                const std::vector<int> &paddings,
+};
-                                Tensor *col) const;
-            };
-            template <typename DeviceType, typename T> class Col2VolFunctor {
+template <typename DeviceType, typename T> class Col2VolFunctor {
-              public:
+  public:
-                void operator()(const Tensor &col,
+    void operator()(const Tensor &col, const std::vector<int> &dilations,
-                                const std::vector<int> &dilations,
+                    const std::vector<int> &strides,
-                                const std::vector<int> &strides,
+                    const std::vector<int> &paddings, Tensor *vol) const;
-                                const std::vector<int> &paddings,
+};
-                                Tensor *vol) const;
-            };
-        } // namespace math
+} // namespace math
-    }     // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -19,39 +19,38 @@ SOFTWARE.
 #include "mul_op.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        template <typename Dtype, typename T>
+template <typename Dtype, typename T> void MulOp<Dtype, T>::InferShape() const {
-        void MulOp<Dtype, T>::InferShape() const {
+    auto x_dims = param_.InputX()->dims();
-            auto x_dims = param_.InputX()->dims();
+    auto y_dims = param_.InputY()->dims();
-            auto y_dims = param_.InputY()->dims();
+    int x_num_col_dims = param_.XNumColDims();
-            int x_num_col_dims = param_.XNumColDims();
+    int y_num_col_dims = param_.YNumColDims();
-            int y_num_col_dims = param_.YNumColDims();
-            assert(x_dims.size() > x_num_col_dims);
+    assert(x_dims.size() > x_num_col_dims);
-            assert(y_dims.size() > y_num_col_dims);
+    assert(y_dims.size() > y_num_col_dims);
-            /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
+    /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
-            auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
+    auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
-            auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
+    auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
-            assert(x_mat_dims[1] == y_mat_dims[0]);
+    assert(x_mat_dims[1] == y_mat_dims[0]);
-            std::vector<int64_t> output_dims;
+    std::vector<int64_t> output_dims;
-            output_dims.reserve(static_cast<size_t>(
+    output_dims.reserve(
-                x_num_col_dims + y_dims.size() - y_num_col_dims));
+        static_cast<size_t>(x_num_col_dims + y_dims.size() - y_num_col_dims));
-            for (int i = 0; i < x_num_col_dims; ++i) {
+    for (int i = 0; i < x_num_col_dims; ++i) {
-                output_dims.push_back(x_dims[i]);
+        output_dims.push_back(x_dims[i]);
-            }
+    }
-            for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
-                output_dims.push_back(y_dims[i]);
-            }
-            framework::DDim ddim = framework::make_ddim(output_dims);
+    for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
-            param_.Out()->Resize(ddim);
+        output_dims.push_back(y_dims[i]);
-        }
-        template class MulOp<CPU, float>;
    }
+    framework::DDim ddim = framework::make_ddim(output_dims);
+    param_.Out()->Resize(ddim);
 }
+template class MulOp<CPU, float>;
+} // namespace operators
+} // namespace paddle_mobile
--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -21,32 +21,31 @@ SOFTWARE.
 #include "operators/op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        template <typename DeviceType, typename T>
+template <typename DeviceType, typename T>
-        class MulOp : public framework::OperatorWithKernel<DeviceType> {
+class MulOp : public framework::OperatorWithKernel<DeviceType> {
-          public:
+  public:
-            MulOp(const std::string &type, const VariableNameMap &inputs,
+    MulOp(const std::string &type, const VariableNameMap &inputs,
-                  const VariableNameMap &outputs,
+          const VariableNameMap &outputs, const framework::AttributeMap attrs,
-                  const framework::AttributeMap attrs,
+          std::shared_ptr<framework::Scope> scope)
-                  std::shared_ptr<framework::Scope> scope)
+        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                : framework::OperatorWithKernel<DeviceType>(
+                                                    attrs, scope),
-                      type, inputs, outputs, attrs, scope),
+          param_(inputs, outputs, attrs, *scope) {}
-                  param_(inputs, outputs, attrs, *scope) {}
+    void Run() const {
-            void Run() const {
+        operators::MulKernel<DeviceType, T, MulParam> kernel;
-                operators::MulKernel<DeviceType, T, MulParam> kernel;
+        kernel.Compute(param_);
-                kernel.Compute(param_);
+    }
-            }
+    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+    void InferShape() const override;
-            void InferShape() const override;
+  protected:
-          protected:
+    MulParam param_;
-            MulParam param_;
+};
-        };
+} // namespace operators
-    } // namespace operators
+} // namespace paddle_mobile
-} // namespace paddle
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -19,27 +19,27 @@ SOFTWARE.
 #include "op_param.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        Print &operator<<(Print &printer, const ConvParam &conv_param) {
+Print &operator<<(Print &printer, const ConvParam &conv_param) {
-            printer << "parameter of conv: "
+    printer << "parameter of conv: "
-                    << "\n";
+            << "\n";
-            printer << "  stride: "
+    printer << "  stride: "
-                    << " (" << conv_param.Strides()[0]
+            << " (" << conv_param.Strides()[0] << conv_param.Strides()[1]
-                    << conv_param.Strides()[1] << ") "
+            << ") "
-                    << "\n";
+            << "\n";
-            printer << "  paddings: "
+    printer << "  paddings: "
-                    << " (" << conv_param.Paddings()[0]
+            << " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1]
-                    << conv_param.Paddings()[1] << ") "
+            << ") "
-                    << "\n";
+            << "\n";
-            printer << "  dilations: "
+    printer << "  dilations: "
-                    << " (" << conv_param.Dilations()[0]
+            << " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1]
-                    << conv_param.Dilations()[1] << ") "
+            << ") "
-                    << "\n";
+            << "\n";
-            printer << "  groups: " << conv_param.Groups() << "\n";
+    printer << "  groups: " << conv_param.Groups() << "\n";
-            printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
+    printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
-            printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
+    printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
-            printer << "  output dims: " << conv_param.Output()->dims();
+    printer << "  output dims: " << conv_param.Output()->dims();
-            return printer;
+    return printer;
-        }
+}
-    } // namespace operators
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -26,211 +26,201 @@ SOFTWARE.
 #include "framework/variable.h"
 namespace paddle_mobile {
-    namespace operators {
+namespace operators {
-        using namespace framework;
+using namespace framework;
-        class OpParam : PaddleMobileObject {
+class OpParam : PaddleMobileObject {
-          public:
+  public:
-          protected:
+  protected:
-            template <typename T>
+    template <typename T>
-            static T *InputFrom(const VariableNameMap &inputs,
+    static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
-                                const Scope &scope) {
+        return GetVarValue<T>("Input", inputs, scope);
-                return GetVarValue<T>("Input", inputs, scope);
+    }
-            }
+    template <typename T>
-            template <typename T>
+    static T *InputXFrom(const VariableNameMap &inputs, const Scope &scope) {
-            static T *InputXFrom(const VariableNameMap &inputs,
+        return GetVarValue<T>("X", inputs, scope);
-                                 const Scope &scope) {
+    }
-                return GetVarValue<T>("X", inputs, scope);
-            }
+    template <typename T>
+    static T *InputYFrom(const VariableNameMap &inputs, const Scope &scope) {
-            template <typename T>
+        return GetVarValue<T>("Y", inputs, scope);
-            static T *InputYFrom(const VariableNameMap &inputs,
+    }
-                                 const Scope &scope) {
-                return GetVarValue<T>("Y", inputs, scope);
+    template <typename T>
-            }
+    static std::vector<T *> InputMultiFrom(const VariableNameMap &inputs,
+                                           const Scope &scope) {
-            template <typename T>
+        return GetMultiVarValue<T>("Input", inputs, scope);
-            static std::vector<T *>
+    }
-            InputMultiFrom(const VariableNameMap &inputs, const Scope &scope) {
-                return GetMultiVarValue<T>("Input", inputs, scope);
+    template <typename T>
-            }
+    static T *OutputFrom(const VariableNameMap &outputs, const Scope &scope) {
+        return GetVarValue<T>("Output", outputs, scope);
-            template <typename T>
+    }
-            static T *OutputFrom(const VariableNameMap &outputs,
-                                 const Scope &scope) {
+    template <typename T>
-                return GetVarValue<T>("Output", outputs, scope);
+    static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
-            }
+        return GetVarValue<T>("Out", outputs, scope);
+    }
-            template <typename T>
-            static T *OutFrom(const VariableNameMap &outputs,
+    template <typename T>
-                              const Scope &scope) {
+    static T *FilterFrom(const VariableNameMap &inputs, const Scope &scope) {
-                return GetVarValue<T>("Out", outputs, scope);
+        return GetVarValue<T>("Filter", inputs, scope);
-            }
+    }
-            template <typename T>
+    template <typename T>
-            static T *FilterFrom(const VariableNameMap &inputs,
+    static const T GetAttr(std::string key, const AttributeMap &map) {
-                                 const Scope &scope) {
+        return ((Attribute)map.at(key)).Get<T>();
-                return GetVarValue<T>("Filter", inputs, scope);
+    }
-            }
+    template <typename T>
-            template <typename T>
+    static T *GetVarValue(std::string key, const VariableNameMap &var_map,
-            static const T GetAttr(std::string key, const AttributeMap &map) {
+                          const Scope &scope) {
-                return ((Attribute)map.at(key)).Get<T>();
+        auto var_vec = var_map.at(key);
-            }
+        if (var_vec.size()) {
+            //      std::cout << " get var value -- " << var_vec[0] <<
-            template <typename T>
+            //      std::endl;
-            static T *GetVarValue(std::string key,
+            auto var = scope.FindVar(var_vec[0]);
-                                  const VariableNameMap &var_map,
+            return var->GetMutable<T>();
-                                  const Scope &scope) {
+        } else {
-                auto var_vec = var_map.at(key);
+            return nullptr;
-                if (var_vec.size()) {
+        }
-                    //      std::cout << " get var value -- " << var_vec[0] <<
+    }
-                    //      std::endl;
-                    auto var = scope.FindVar(var_vec[0]);
+    template <typename T>
-                    return var->GetMutable<T>();
+    static std::vector<T *> GetMultiVarValue(std::string key,
-                } else {
+                                             const VariableNameMap &var_map,
-                    return nullptr;
+                                             const Scope &scope) {
-                }
+        auto var_vecs = var_map.at(key);
-            }
+        assert(var_vecs.size() > 1);
+        std::vector<T *> var_res;
-            template <typename T>
+        for (auto &var_vec : var_vecs) {
-            static std::vector<T *>
+            auto var = scope.FindVar(var_vec);
-            GetMultiVarValue(std::string key, const VariableNameMap &var_map,
+            var_res.push_back(var->GetMutable<T>());
-                             const Scope &scope) {
+        }
-                auto var_vecs = var_map.at(key);
+        return var_res;
-                assert(var_vecs.size() > 1);
+    }
-                std::vector<T *> var_res;
+};
-                for (auto &var_vec : var_vecs) {
-                    auto var = scope.FindVar(var_vec);
+class ConvParam : OpParam {
-                    var_res.push_back(var->GetMutable<T>());
+  public:
-                }
+    ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-                return var_res;
+              const framework::AttributeMap &attrs,
-            }
+              const framework::Scope &scope) {
-        };
+        filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
+        input_ = InputFrom<framework::Tensor>(inputs, scope);
-        class ConvParam : OpParam {
+        output_ = OutputFrom<framework::Tensor>(outputs, scope);
-          public:
+        strides_ = GetAttr<std::vector<int>>("strides", attrs);
-            ConvParam(const VariableNameMap &inputs,
+        paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-                      const VariableNameMap &outputs,
+        dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
-                      const framework::AttributeMap &attrs,
+        groups = GetAttr<int>("groups", attrs);
-                      const framework::Scope &scope) {
+    }
-                filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
-                input_ = InputFrom<framework::Tensor>(inputs, scope);
+    const Tensor *Input() const { return input_; }
-                output_ = OutputFrom<framework::Tensor>(outputs, scope);
-                strides_ = GetAttr<std::vector<int>>("strides", attrs);
+    const LoDTensor *Filter() const { return filter_; }
-                paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-                dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
+    Tensor *Output() const { return output_; }
-                groups = GetAttr<int>("groups", attrs);
-            }
+    const std::vector<int> &Strides() const { return strides_; }
-            const Tensor *Input() const { return input_; }
+    const std::vector<int> &Paddings() const { return paddings_; }
-            const LoDTensor *Filter() const { return filter_; }
+    const std::vector<int> &Dilations() const { return dilations_; }
-            Tensor *Output() const { return output_; }
+    const int &Groups() const { return groups; }
-            const std::vector<int> &Strides() const { return strides_; }
+  private:
+    Tensor *input_;
-            const std::vector<int> &Paddings() const { return paddings_; }
+    Tensor *output_;
+    LoDTensor *filter_;
-            const std::vector<int> &Dilations() const { return dilations_; }
+    std::vector<int> strides_;
+    std::vector<int> paddings_;
-            const int &Groups() const { return groups; }
+    std::vector<int> dilations_;
+    int groups;
-          private:
+};
-            Tensor *input_;
-            Tensor *output_;
+Print &operator<<(Print &printer, const ConvParam &conv_param);
-            LoDTensor *filter_;
-            std::vector<int> strides_;
+class ElementwiseAddParam : OpParam {
-            std::vector<int> paddings_;
+  public:
-            std::vector<int> dilations_;
+    ElementwiseAddParam(const VariableNameMap &inputs,
-            int groups;
-        };
-        Print &operator<<(Print &printer, const ConvParam &conv_param);
-        class ElementwiseAddParam : OpParam {
-          public:
-            ElementwiseAddParam(const VariableNameMap &inputs,
-                                const VariableNameMap &outputs,
-                                const framework::AttributeMap &attrs,
-                                const framework::Scope &scope) {
-                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
-                axis_ = GetAttr<int>("axis", attrs);
-            }
-            const Tensor *InputX() const { return input_x_; }
-            const Tensor *InputY() const { return input_y_; }
-            Tensor *Out() const { return out_; }
-            const int &Axis() const { return axis_; }
-          private:
-            Tensor *input_x_;
-            Tensor *input_y_;
-            Tensor *out_;
-            int axis_;
-        };
-        class MulParam : OpParam {
-          public:
-            MulParam(const VariableNameMap &inputs,
-                     const VariableNameMap &outputs,
-                     const framework::AttributeMap &attrs,
-                     const framework::Scope &scope) {
-                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
-                x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
-                y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
-            }
-            const Tensor *InputX() const { return input_x_; }
-            const Tensor *InputY() const { return input_y_; }
-            Tensor *Out() const { return out_; }
-            const int &XNumColDims() const { return x_num_col_dims_; }
-            const int &YNumColDims() const { return y_num_col_dims_; }
-          private:
-            Tensor *input_x_;
-            Tensor *input_y_;
-            Tensor *out_;
-            int x_num_col_dims_;
-            int y_num_col_dims_;
-        };
-        class ConcatParam : public OpParam {
-          public:
-            ConcatParam(const VariableNameMap &inputs,
                        const VariableNameMap &outputs,
                        const framework::AttributeMap &attrs,
                        const framework::Scope &scope) {
-                inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
+        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-                out_ = OutFrom<framework::Tensor>(outputs, scope);
+        input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-                axis_ = GetAttr<int>("axis", attrs);
+        out_ = OutFrom<framework::Tensor>(outputs, scope);
-            }
+        axis_ = GetAttr<int>("axis", attrs);
+    }
-            std::vector<Tensor *> Inputs() const { return inputs_; }
+    const Tensor *InputX() const { return input_x_; }
-            Tensor *Out() const { return out_; }
+    const Tensor *InputY() const { return input_y_; }
-            const int &Axis() const { return axis_; }
+    Tensor *Out() const { return out_; }
-          private:
+    const int &Axis() const { return axis_; }
-            std::vector<Tensor *> inputs_;
-            Tensor *out_;
-            int axis_;
-        };
-    } // namespace operators
+  private:
+    Tensor *input_x_;
+    Tensor *input_y_;
+    Tensor *out_;
+    int axis_;
+};
+class MulParam : OpParam {
+  public:
+    MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const framework::AttributeMap &attrs,
+             const framework::Scope &scope) {
+        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+        input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+        out_ = OutFrom<framework::Tensor>(outputs, scope);
+        x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
+        y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
+    }
+    const Tensor *InputX() const { return input_x_; }
+    const Tensor *InputY() const { return input_y_; }
+    Tensor *Out() const { return out_; }
+    const int &XNumColDims() const { return x_num_col_dims_; }
+    const int &YNumColDims() const { return y_num_col_dims_; }
+  private:
+    Tensor *input_x_;
+    Tensor *input_y_;
+    Tensor *out_;
+    int x_num_col_dims_;
+    int y_num_col_dims_;
+};
+class ConcatParam : public OpParam {
+  public:
+    ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+                const framework::AttributeMap &attrs,
+                const framework::Scope &scope) {
+        inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
+        out_ = OutFrom<framework::Tensor>(outputs, scope);
+        axis_ = GetAttr<int>("axis", attrs);
+    }
+    std::vector<Tensor *> Inputs() const { return inputs_; }
+    Tensor *Out() const { return out_; }
+    const int &Axis() const { return axis_; }
+  private:
+    std::vector<Tensor *> inputs_;
+    Tensor *out_;
+    int axis_;
+};
+} // namespace operators
 } // namespace paddle_mobile
--- a/src/platform/data_type.h
+++ b/src/platform/data_type.h
@@ -19,107 +19,107 @@ limitations under the License. */
 #include <typeindex>
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        inline proto::VarType::Type ToDataType(std::type_index type) {
+inline proto::VarType::Type ToDataType(std::type_index type) {
-            /*if (typeid(platform::float16).hash_code() == type.hash_code()) {
+    /*if (typeid(platform::float16).hash_code() == type.hash_code()) {
-              return proto::VarType::FP16;
+      return proto::VarType::FP16;
-            } else */
+    } else */
-            if (typeid(const float).hash_code() == type.hash_code()) {
+    if (typeid(const float).hash_code() == type.hash_code()) {
-                // CPPLint complains Using C-style cast.  Use
+        // CPPLint complains Using C-style cast.  Use
-                // static_cast<float>() instead
+        // static_cast<float>() instead
-                // One fix to this is to replace float with const float because
+        // One fix to this is to replace float with const float because
-                // typeid(T) == typeid(const T)
+        // typeid(T) == typeid(const T)
-                // http://en.cppreference.com/w/cpp/language/typeid
+        // http://en.cppreference.com/w/cpp/language/typeid
-                return proto::VarType::FP32;
+        return proto::VarType::FP32;
-            } else if (typeid(const double).hash_code() == type.hash_code()) {
+    } else if (typeid(const double).hash_code() == type.hash_code()) {
-                return proto::VarType::FP64;
+        return proto::VarType::FP64;
-            } else if (typeid(const int).hash_code() == type.hash_code()) {
+    } else if (typeid(const int).hash_code() == type.hash_code()) {
-                return proto::VarType::INT32;
+        return proto::VarType::INT32;
-            } else if (typeid(const int64_t).hash_code() == type.hash_code()) {
+    } else if (typeid(const int64_t).hash_code() == type.hash_code()) {
-                return proto::VarType::INT64;
+        return proto::VarType::INT64;
-            } else if (typeid(const bool).hash_code() == type.hash_code()) {
+    } else if (typeid(const bool).hash_code() == type.hash_code()) {
-                return proto::VarType::BOOL;
+        return proto::VarType::BOOL;
-            } else {
+    } else {
-                //    PADDLE_THROW("Not supported");
+        //    PADDLE_THROW("Not supported");
-                //    std::cout << "Not supported";
+        //    std::cout << "Not supported";
-            }
+    }
-        }
+}
-        inline std::type_index ToTypeIndex(proto::VarType::Type type) {
+inline std::type_index ToTypeIndex(proto::VarType::Type type) {
-            switch (type) {
+    switch (type) {
-            //    case proto::VarType::FP16:
+    //    case proto::VarType::FP16:
-            //      return typeid(platform::float16);
+    //      return typeid(platform::float16);
-            case proto::VarType::FP32:
+    case proto::VarType::FP32:
-                return typeid(float);
+        return typeid(float);
-            case proto::VarType::FP64:
+    case proto::VarType::FP64:
-                return typeid(double);
+        return typeid(double);
-            case proto::VarType::INT32:
+    case proto::VarType::INT32:
-                return typeid(int);
+        return typeid(int);
-            case proto::VarType::INT64:
+    case proto::VarType::INT64:
-                return typeid(int64_t);
+        return typeid(int64_t);
-            case proto::VarType::BOOL:
+    case proto::VarType::BOOL:
-                return typeid(bool);
+        return typeid(bool);
-            default:
+    default:
-                //      PADDLE_THROW("Not support type %d", type);
+        //      PADDLE_THROW("Not support type %d", type);
-                printf("Not support type %d", type);
+        printf("Not support type %d", type);
-            }
+    }
-        }
+}
-        template <typename Visitor>
+template <typename Visitor>
-        inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
+inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
-            switch (type) {
+    switch (type) {
-            //    case proto::VarType::FP16:
+    //    case proto::VarType::FP16:
-            //      visitor.template operator()<platform::float16>();
+    //      visitor.template operator()<platform::float16>();
-            //      break;
+    //      break;
-            case proto::VarType::FP32:
+    case proto::VarType::FP32:
-                visitor.template operator()<float>();
+        visitor.template operator()<float>();
-                break;
+        break;
-            case proto::VarType::FP64:
+    case proto::VarType::FP64:
-                visitor.template operator()<double>();
+        visitor.template operator()<double>();
-                break;
+        break;
-            case proto::VarType::INT32:
+    case proto::VarType::INT32:
-                visitor.template operator()<int>();
+        visitor.template operator()<int>();
-                break;
+        break;
-            case proto::VarType::INT64:
+    case proto::VarType::INT64:
-                visitor.template operator()<int64_t>();
+        visitor.template operator()<int64_t>();
-                break;
+        break;
-            case proto::VarType::BOOL:
+    case proto::VarType::BOOL:
-                visitor.template operator()<bool>();
+        visitor.template operator()<bool>();
-                break;
+        break;
-            default:
+    default:
-                //      PADDLE_THROW("Not supported");
+        //      PADDLE_THROW("Not supported");
-                printf("Not supported");
+        printf("Not supported");
-            }
+    }
-        }
+}
-        inline std::string DataTypeToString(const proto::VarType::Type type) {
+inline std::string DataTypeToString(const proto::VarType::Type type) {
-            switch (type) {
+    switch (type) {
-            case proto::VarType::FP16:
+    case proto::VarType::FP16:
-                return "float16";
+        return "float16";
-            case proto::VarType::FP32:
+    case proto::VarType::FP32:
-                return "float32";
+        return "float32";
-            case proto::VarType::FP64:
+    case proto::VarType::FP64:
-                return "float64";
+        return "float64";
-            case proto::VarType::INT16:
+    case proto::VarType::INT16:
-                return "int16";
+        return "int16";
-            case proto::VarType::INT32:
+    case proto::VarType::INT32:
-                return "int32";
+        return "int32";
-            case proto::VarType::INT64:
+    case proto::VarType::INT64:
-                return "int64";
+        return "int64";
-            case proto::VarType::BOOL:
+    case proto::VarType::BOOL:
-                return "bool";
+        return "bool";
-            default:
+    default:
-                //      PADDLE_THROW("Not support type %d", type);
+        //      PADDLE_THROW("Not support type %d", type);
-                printf("Not support type %d", type);
+        printf("Not support type %d", type);
-            }
+    }
-        }
+}
-        inline std::ostream &operator<<(std::ostream &out,
+inline std::ostream &operator<<(std::ostream &out,
-                                        const proto::VarType::Type &type) {
+                                const proto::VarType::Type &type) {
-            out << DataTypeToString(type);
+    out << DataTypeToString(type);
-            return out;
+    return out;
-        }
+}
-    } // namespace framework
+} // namespace framework
 } // namespace paddle_mobile
--- a/test/common/test_log.cpp
+++ b/test/common/test_log.cpp
@@ -22,9 +22,10 @@ int main() {
    DLOGF("DASJFDAFJ%d -- %f", 12345, 344.234);
-    LOGF( paddle_mobile::kLOG_DEBUG, "DASJFDAFJ%d -- %f", 12345, 344.234);
+    LOGF(paddle_mobile::kLOG_DEBUG, "DASJFDAFJ%d -- %f", 12345, 344.234);
-    LOG(paddle_mobile::kLOG_DEBUG) << "test debug" << " next log";
+    LOG(paddle_mobile::kLOG_DEBUG) << "test debug"
+                                   << " next log";
    LOG(paddle_mobile::kLOG_DEBUG1) << "test debug1"
                                    << " next log";

--- a/test/elementwise_add_op_test.h
+++ b/test/elementwise_add_op_test.h
@@ -21,149 +21,144 @@ SOFTWARE.
 #include "test_include.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype> class TestElementwiseAddOp {
+template <typename Dtype> class TestElementwiseAddOp {
-          public:
+  public:
-            TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
+    TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
-                if (use_optimize_) {
+        if (use_optimize_) {
-                    to_predict_program_ = program_.optimizeProgram;
+            to_predict_program_ = program_.optimizeProgram;
-                } else {
+        } else {
-                    to_predict_program_ = program_.originProgram;
+            to_predict_program_ = program_.originProgram;
-                }
+        }
-                const std::vector<std::shared_ptr<BlockDesc>> blocks =
-                    to_predict_program_->Blocks();
-                //  DLOG << " **block size " << blocks.size();
-                for (int i = 0; i < blocks.size(); ++i) {
-                    std::shared_ptr<BlockDesc> block_desc = blocks[i];
-                    std::vector<std::shared_ptr<OpDesc>> ops =
-                        block_desc->Ops();
-                    //    DLOG << " ops " << ops.size();
-                    for (int j = 0; j < ops.size(); ++j) {
-                        std::shared_ptr<OpDesc> op = ops[j];
-                        //                        if (op->Type() ==
-                        //                        "elementwise_add") {
-                        //                            if
-                        //                            (op->GetAttrMap().at("axis").Get<int>()
-                        //                            != -1) {
-                        //                                DLOG << "attr: axis =
-                        //                                "
-                        //                                     <<
-                        //                                     op->GetAttrMap().at("axis").Get<int>();
-                        //                            }
-                        //                        }
-                        //                        DLOG << "op:" << op->Type();
-                        if (op->Type() == "elementwise_add" &&
-                            op->Input("X")[0] == "batch_norm_2.tmp_2") {
-                            DLOG << " elementwise_add attr size: "
-                                 << op->GetAttrMap().size();
-                            DLOG << " inputs size: " << op->GetInputs().size();
-                            DLOG << " outputs size: "
-                                 << op->GetOutputs().size();
-                            DLOG << " Input X is : " << op->Input("X")[0];
-                            DLOG << " Input Y is : " << op->Input("Y")[0];
-                            DLOG << " Output Out is : " << op->Output("Out")[0];
-                            Attribute axis_attr = op->GetAttrMap().at("axis");
-                            int axis = axis_attr.Get<int>();
-                            DLOG << " Attr axis is : " << axis;
-                            std::shared_ptr<
-                                operators::ElementwiseAddOp<Dtype, float>>
-                                add = std::make_shared<
-                                    operators::ElementwiseAddOp<Dtype, float>>(
-                                    op->Type(), op->GetInputs(),
-                                    op->GetOutputs(), op->GetAttrMap(),
-                                    program_.scope);
-                            ops_of_block_[*block_desc.get()].push_back(add);
-                        }
-                    }
-                }
-            }
-            std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
-                // feed
-                auto scope = program_.scope;
-                Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
-                auto tensor_x = x_feed_value->GetMutable<Tensor>();
-                tensor_x->ShareDataWith(t1);
-                Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
-                auto tensor_y = y_feed_value->GetMutable<Tensor>();
-                tensor_y->ShareDataWith(t2);
-                Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
-                Tensor *output_tensor = con_output->GetMutable<Tensor>();
-                output_tensor->mutable_data<float>({1, 3, 224, 224});
-                //  DLOG << typeid(output_tensor).name();
-                //  DLOG << "output_tensor dims: " << output_tensor->dims();
-                std::shared_ptr<Tensor> out_tensor =
-                    std::make_shared<LoDTensor>();
-                out_tensor.reset(output_tensor);
-                predict_add(t1, t2, 0);
-                return out_tensor;
-            }
-          private:
+        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            const framework::Program<Dtype> program_;
+            to_predict_program_->Blocks();
-            std::shared_ptr<ProgramDesc> to_predict_program_;
+        //  DLOG << " **block size " << blocks.size();
-            std::map<framework::BlockDesc,
+        for (int i = 0; i < blocks.size(); ++i) {
-                     std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-                ops_of_block_;
+            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            bool use_optimize_ = false;
+            //    DLOG << " ops " << ops.size();
+            for (int j = 0; j < ops.size(); ++j) {
-            void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+                std::shared_ptr<OpDesc> op = ops[j];
-                std::shared_ptr<BlockDesc> to_predict_block =
+                //                        if (op->Type() ==
-                    to_predict_program_->Block(block_id);
+                //                        "elementwise_add") {
-                for (int j = 0;
+                //                            if
-                     j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+                //                            (op->GetAttrMap().at("axis").Get<int>()
-                    auto op = ops_of_block_[*to_predict_block.get()][j];
+                //                            != -1) {
-                    DLOG << "op -> run()";
+                //                                DLOG << "attr: axis =
-                    op->Run();
+                //                                "
+                //                                     <<
+                //                                     op->GetAttrMap().at("axis").Get<int>();
+                //                            }
+                //                        }
+                //                        DLOG << "op:" << op->Type();
+                if (op->Type() == "elementwise_add" &&
+                    op->Input("X")[0] == "batch_norm_2.tmp_2") {
+                    DLOG << " elementwise_add attr size: "
+                         << op->GetAttrMap().size();
+                    DLOG << " inputs size: " << op->GetInputs().size();
+                    DLOG << " outputs size: " << op->GetOutputs().size();
+                    DLOG << " Input X is : " << op->Input("X")[0];
+                    DLOG << " Input Y is : " << op->Input("Y")[0];
+                    DLOG << " Output Out is : " << op->Output("Out")[0];
+                    Attribute axis_attr = op->GetAttrMap().at("axis");
+                    int axis = axis_attr.Get<int>();
+                    DLOG << " Attr axis is : " << axis;
+                    std::shared_ptr<operators::ElementwiseAddOp<Dtype, float>>
+                        add = std::make_shared<
+                            operators::ElementwiseAddOp<Dtype, float>>(
+                            op->Type(), op->GetInputs(), op->GetOutputs(),
+                            op->GetAttrMap(), program_.scope);
+                    ops_of_block_[*block_desc.get()].push_back(add);
                }
            }
-        };
-        template class TestElementwiseAddOp<CPU>;
-    } // namespace framework
-    namespace test {
-        void testElementwiseAdd() {
-            DLOG << "----------**********----------";
-            DLOG << "begin to run ElementAddOp Test";
-            paddle_mobile::Loader<paddle_mobile::CPU> loader;
-            auto program = loader.Load(
-                std::string("../../test/models/"
-                            "image_classification_resnet.inference.model"));
-            /// input x (1,3,224,224)
-            paddle_mobile::framework::Tensor inputx;
-            SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
-                               static_cast<float>(1));
-            float *inputx_ptr = inputx.data<float>();
-            /// input y (224,)
-            paddle_mobile::framework::Tensor inputy;
-            SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
-                               static_cast<float>(1));
-            float *inputy_ptr = inputy.data<float>();
-            paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
-                testElementwiseAddOp(program);
-            auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
-            float *output_add_ptr = output_add->data<float>();
-            //            for (int j = 0; j < output_add->numel(); ++j) {
-            //                DLOG << "value of output: " << output_add_ptr[j];
-            //            }
-            /// output (1,3,224,224)
-            DLOG << "output memory size : " << output_add->memory_size();
-            DLOG << "output numel : " << output_add->numel();
-            DLOG << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
-                 << output_add_ptr[226];
        }
-    } // namespace test
+    }
+    std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
+        // feed
+        auto scope = program_.scope;
+        Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
+        auto tensor_x = x_feed_value->GetMutable<Tensor>();
+        tensor_x->ShareDataWith(t1);
+        Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
+        auto tensor_y = y_feed_value->GetMutable<Tensor>();
+        tensor_y->ShareDataWith(t2);
+        Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
+        Tensor *output_tensor = con_output->GetMutable<Tensor>();
+        output_tensor->mutable_data<float>({1, 3, 224, 224});
+        //  DLOG << typeid(output_tensor).name();
+        //  DLOG << "output_tensor dims: " << output_tensor->dims();
+        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+        out_tensor.reset(output_tensor);
+        predict_add(t1, t2, 0);
+        return out_tensor;
+    }
+  private:
+    const framework::Program<Dtype> program_;
+    std::shared_ptr<ProgramDesc> to_predict_program_;
+    std::map<framework::BlockDesc,
+             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+        ops_of_block_;
+    bool use_optimize_ = false;
+    void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+        std::shared_ptr<BlockDesc> to_predict_block =
+            to_predict_program_->Block(block_id);
+        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
+             ++j) {
+            auto op = ops_of_block_[*to_predict_block.get()][j];
+            DLOG << "op -> run()";
+            op->Run();
+        }
+    }
+};
+template class TestElementwiseAddOp<CPU>;
+} // namespace framework
+namespace test {
+void testElementwiseAdd() {
+    DLOG << "----------**********----------";
+    DLOG << "begin to run ElementAddOp Test";
+    paddle_mobile::Loader<paddle_mobile::CPU> loader;
+    auto program =
+        loader.Load(std::string("../../test/models/"
+                                "image_classification_resnet.inference.model"));
+    /// input x (1,3,224,224)
+    paddle_mobile::framework::Tensor inputx;
+    SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
+                       static_cast<float>(1));
+    float *inputx_ptr = inputx.data<float>();
+    /// input y (224,)
+    paddle_mobile::framework::Tensor inputy;
+    SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
+                       static_cast<float>(1));
+    float *inputy_ptr = inputy.data<float>();
+    paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
+        testElementwiseAddOp(program);
+    auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
+    float *output_add_ptr = output_add->data<float>();
+    //            for (int j = 0; j < output_add->numel(); ++j) {
+    //                DLOG << "value of output: " << output_add_ptr[j];
+    //            }
+    /// output (1,3,224,224)
+    DLOG << "output memory size : " << output_add->memory_size();
+    DLOG << "output numel : " << output_add->numel();
+    DLOG << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
+         << output_add_ptr[226];
+}
+} // namespace test
 } // namespace paddle_mobile
--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
@@ -16,24 +16,22 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-#include <iostream>
 #include "framework/program-optimize/node.h"
+#include <iostream>
 using namespace paddle_mobile::framework;
-int main(){
+int main() {
    Node node("conv");
    node > Node("add") > Node("relu");
    Node node1("conv");
    node1 > Node("add") > Node("relu");
-    if (node == node1){
+    if (node == node1) {
        DLOG << "equal";
    }
    DLOG << "\n" << node1;
-//    DLOG << node;
+    //    DLOG << node;
 }
--- a/test/mul_op_test.h
+++ b/test/mul_op_test.h
@@ -21,182 +21,170 @@ SOFTWARE.
 #include "test_include.h"
 namespace paddle_mobile {
-    namespace framework {
+namespace framework {
-        template <typename Dtype> class TestMulOp {
+template <typename Dtype> class TestMulOp {
-          public:
+  public:
-            TestMulOp(const Program<Dtype> p) : program_(p) {
+    TestMulOp(const Program<Dtype> p) : program_(p) {
-                if (use_optimize_) {
+        if (use_optimize_) {
-                    to_predict_program_ = program_.optimizeProgram;
+            to_predict_program_ = program_.optimizeProgram;
-                } else {
+        } else {
-                    to_predict_program_ = program_.originProgram;
+            to_predict_program_ = program_.originProgram;
-                }
+        }
-                const std::vector<std::shared_ptr<BlockDesc>> blocks =
-                    to_predict_program_->Blocks();
-                //  DLOG << " **block size " << blocks.size();
-                for (int i = 0; i < blocks.size(); ++i) {
-                    std::shared_ptr<BlockDesc> block_desc = blocks[i];
-                    std::vector<std::shared_ptr<OpDesc>> ops =
-                        block_desc->Ops();
-                    //    DLOG << " ops " << ops.size();
-                    for (int j = 0; j < ops.size(); ++j) {
-                        std::shared_ptr<OpDesc> op = ops[j];
-                        //                        if (op->Type() == "mul") {
-                        //                            DLOG << "x_num_col_dims :
-                        //                            "
-                        //                                 << op->GetAttrMap()
-                        //                                        .at("x_num_col_dims")
-                        //                                        .Get<int>();
-                        //                            DLOG << "y_num_col_dims :
-                        //                            "
-                        //                                 << op->GetAttrMap()
-                        //                                        .at("y_num_col_dims")
-                        //                                        .Get<int>();
-                        //                            DLOG << " Input X is : "
-                        //                            << op->Input("X")[0];
-                        //                        }
-                        //                        DLOG << "op:" << op->Type();
-                        if (op->Type() == "mul" &&
-                            op->Input("X")[0] == "pool2d_0.tmp_0") {
-                            DLOG << " mul attr size: "
-                                 << op->GetAttrMap().size();
-                            DLOG << " inputs size: " << op->GetInputs().size();
-                            DLOG << " outputs size: "
-                                 << op->GetOutputs().size();
-                            DLOG << " Input X is : " << op->Input("X")[0];
-                            DLOG << " Input Y is : " << op->Input("Y")[0];
-                            DLOG << " Output Out is : " << op->Output("Out")[0];
-                            DLOG << "x_num_col_dims : "
-                                 << op->GetAttrMap()
-                                        .at("x_num_col_dims")
-                                        .Get<int>();
-                            DLOG << "y_num_col_dims : "
-                                 << op->GetAttrMap()
-                                        .at("y_num_col_dims")
-                                        .Get<int>();
-                            std::shared_ptr<operators::MulOp<Dtype, float>>
-                                add = std::make_shared<
-                                    operators::MulOp<Dtype, float>>(
-                                    op->Type(), op->GetInputs(),
-                                    op->GetOutputs(), op->GetAttrMap(),
-                                    program_.scope);
-                            ops_of_block_[*block_desc.get()].push_back(add);
-                        }
-                    }
-                }
-            }
-            std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
-                // feed
-                auto scope = program_.scope;
-                Variable *x_feed_value = scope->Var("pool2d_0.tmp_0");
-                auto tensor_x = x_feed_value->GetMutable<Tensor>();
-                tensor_x->ShareDataWith(t1);
-                Variable *y_feed_value = scope->Var("fc_0.w_0");
-                auto tensor_y = y_feed_value->GetMutable<Tensor>();
-                tensor_y->ShareDataWith(t2);
-                Variable *con_output = scope->Var("fc_0.tmp_0");
-                Tensor *output_tensor = con_output->GetMutable<Tensor>();
-                output_tensor->mutable_data<float>({3, 3});
-                //  DLOG << typeid(output_tensor).name();
-                //  DLOG << "output_tensor dims: " << output_tensor->dims();
-                std::shared_ptr<Tensor> out_tensor =
-                    std::make_shared<LoDTensor>();
-                out_tensor.reset(output_tensor);
-                predict_add(t1, t2, 0);
-                return out_tensor;
-            }
-          private:
-            const framework::Program<Dtype> program_;
-            std::shared_ptr<ProgramDesc> to_predict_program_;
-            std::map<framework::BlockDesc,
-                     std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-                ops_of_block_;
-            bool use_optimize_ = false;
-            void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
-                std::shared_ptr<BlockDesc> to_predict_block =
-                    to_predict_program_->Block(block_id);
-                for (int j = 0;
-                     j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-                    auto op = ops_of_block_[*to_predict_block.get()][j];
-                    DLOG << "op -> run()";
-                    op->Run();
-                }
-            }
-        };
-        template class TestMulOp<CPU>;
-    } // namespace framework
-    namespace test {
-        void testMul() {
-            DLOG << "----------**********----------";
-            DLOG << "begin to run MulOp Test";
-            paddle_mobile::Loader<paddle_mobile::CPU> loader;
-            auto program = loader.Load(
-                std::string("../../test/models/"
-                            "image_classification_resnet.inference.model"));
-            /// input x (3,2,1,1)
-            paddle_mobile::framework::Tensor inputx;
-            SetupTensor<float>(&inputx, {3, 2, 1, 1}, static_cast<float>(0),
-                               static_cast<float>(1));
-            float *inputx_ptr = inputx.data<float>();
-            /// input y (2,3)
-            paddle_mobile::framework::Tensor inputy;
-            SetupTensor<float>(&inputy, {2, 3}, static_cast<float>(0),
-                               static_cast<float>(1));
-            float *inputy_ptr = inputy.data<float>();
-            paddle_mobile::framework::TestMulOp<paddle_mobile::CPU> testMulOp(
-                program);
-            auto output_mul = testMulOp.predict_add(inputx, inputy);
-            float *output_mul_ptr = output_mul->data<float>();
-            auto dimx_1 = inputx.numel() / inputx.dims()[0];
-            DLOG << " inputx : ";
-            for (int i = 0; i < inputx.dims()[0]; ++i) {
-                for (int j = 0; j < dimx_1; ++j) {
-                    DLOGF("%f ", inputx_ptr[i * dimx_1 + j]);
-                }
-                DLOGF("\n");
-            }
-            auto dimy_1 = inputy.numel() / inputy.dims()[0];
-            DLOG << " inputy : ";
-            for (int i = 0; i < inputy.dims()[0]; ++i) {
-                for (int j = 0; j < dimy_1; ++j) {
-                    DLOGF("%f ", inputy_ptr[i * dimx_1 + j]);
-                }
-                DLOGF("\n");
-            }
-            auto dim_output_1 = output_mul->numel() / output_mul->dims()[0];
+        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            DLOG << " output : ";
+            to_predict_program_->Blocks();
-            for (int i = 0; i < output_mul->dims()[0]; ++i) {
+        //  DLOG << " **block size " << blocks.size();
-                for (int j = 0; j < dim_output_1; ++j) {
+        for (int i = 0; i < blocks.size(); ++i) {
-                    DLOGF("%f ", output_mul_ptr[i * dimy_1 + j]);
+            std::shared_ptr<BlockDesc> block_desc = blocks[i];
+            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+            //    DLOG << " ops " << ops.size();
+            for (int j = 0; j < ops.size(); ++j) {
+                std::shared_ptr<OpDesc> op = ops[j];
+                //                        if (op->Type() == "mul") {
+                //                            DLOG << "x_num_col_dims :
+                //                            "
+                //                                 << op->GetAttrMap()
+                //                                        .at("x_num_col_dims")
+                //                                        .Get<int>();
+                //                            DLOG << "y_num_col_dims :
+                //                            "
+                //                                 << op->GetAttrMap()
+                //                                        .at("y_num_col_dims")
+                //                                        .Get<int>();
+                //                            DLOG << " Input X is : "
+                //                            << op->Input("X")[0];
+                //                        }
+                //                        DLOG << "op:" << op->Type();
+                if (op->Type() == "mul" &&
+                    op->Input("X")[0] == "pool2d_0.tmp_0") {
+                    DLOG << " mul attr size: " << op->GetAttrMap().size();
+                    DLOG << " inputs size: " << op->GetInputs().size();
+                    DLOG << " outputs size: " << op->GetOutputs().size();
+                    DLOG << " Input X is : " << op->Input("X")[0];
+                    DLOG << " Input Y is : " << op->Input("Y")[0];
+                    DLOG << " Output Out is : " << op->Output("Out")[0];
+                    DLOG << "x_num_col_dims : "
+                         << op->GetAttrMap().at("x_num_col_dims").Get<int>();
+                    DLOG << "y_num_col_dims : "
+                         << op->GetAttrMap().at("y_num_col_dims").Get<int>();
+                    std::shared_ptr<operators::MulOp<Dtype, float>> add =
+                        std::make_shared<operators::MulOp<Dtype, float>>(
+                            op->Type(), op->GetInputs(), op->GetOutputs(),
+                            op->GetAttrMap(), program_.scope);
+                    ops_of_block_[*block_desc.get()].push_back(add);
                }
-                DLOGF("\n");
            }
+        }
+    }
+    std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
+        // feed
+        auto scope = program_.scope;
+        Variable *x_feed_value = scope->Var("pool2d_0.tmp_0");
+        auto tensor_x = x_feed_value->GetMutable<Tensor>();
+        tensor_x->ShareDataWith(t1);
+        Variable *y_feed_value = scope->Var("fc_0.w_0");
+        auto tensor_y = y_feed_value->GetMutable<Tensor>();
+        tensor_y->ShareDataWith(t2);
+        Variable *con_output = scope->Var("fc_0.tmp_0");
+        Tensor *output_tensor = con_output->GetMutable<Tensor>();
+        output_tensor->mutable_data<float>({3, 3});
+        //  DLOG << typeid(output_tensor).name();
+        //  DLOG << "output_tensor dims: " << output_tensor->dims();
+        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+        out_tensor.reset(output_tensor);
+        predict_add(t1, t2, 0);
+        return out_tensor;
+    }
+  private:
+    const framework::Program<Dtype> program_;
+    std::shared_ptr<ProgramDesc> to_predict_program_;
+    std::map<framework::BlockDesc,
+             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+        ops_of_block_;
+    bool use_optimize_ = false;
+    void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+        std::shared_ptr<BlockDesc> to_predict_block =
+            to_predict_program_->Block(block_id);
+        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
+             ++j) {
+            auto op = ops_of_block_[*to_predict_block.get()][j];
+            DLOG << "op -> run()";
+            op->Run();
+        }
+    }
+};
+template class TestMulOp<CPU>;
+} // namespace framework
+namespace test {
+void testMul() {
+    DLOG << "----------**********----------";
+    DLOG << "begin to run MulOp Test";
+    paddle_mobile::Loader<paddle_mobile::CPU> loader;
+    auto program =
+        loader.Load(std::string("../../test/models/"
+                                "image_classification_resnet.inference.model"));
+    /// input x (3,2,1,1)
+    paddle_mobile::framework::Tensor inputx;
+    SetupTensor<float>(&inputx, {3, 2, 1, 1}, static_cast<float>(0),
+                       static_cast<float>(1));
+    float *inputx_ptr = inputx.data<float>();
+    /// input y (2,3)
+    paddle_mobile::framework::Tensor inputy;
+    SetupTensor<float>(&inputy, {2, 3}, static_cast<float>(0),
+                       static_cast<float>(1));
+    float *inputy_ptr = inputy.data<float>();
+    paddle_mobile::framework::TestMulOp<paddle_mobile::CPU> testMulOp(program);
+    auto output_mul = testMulOp.predict_add(inputx, inputy);
+    float *output_mul_ptr = output_mul->data<float>();
+    auto dimx_1 = inputx.numel() / inputx.dims()[0];
+    DLOG << " inputx : ";
+    for (int i = 0; i < inputx.dims()[0]; ++i) {
+        for (int j = 0; j < dimx_1; ++j) {
+            DLOGF("%f ", inputx_ptr[i * dimx_1 + j]);
+        }
+        DLOGF("\n");
+    }
+    auto dimy_1 = inputy.numel() / inputy.dims()[0];
+    DLOG << " inputy : ";
+    for (int i = 0; i < inputy.dims()[0]; ++i) {
+        for (int j = 0; j < dimy_1; ++j) {
+            DLOGF("%f ", inputy_ptr[i * dimx_1 + j]);
+        }
+        DLOGF("\n");
+    }
+    auto dim_output_1 = output_mul->numel() / output_mul->dims()[0];
+    DLOG << " output : ";
+    for (int i = 0; i < output_mul->dims()[0]; ++i) {
+        for (int j = 0; j < dim_output_1; ++j) {
+            DLOGF("%f ", output_mul_ptr[i * dimy_1 + j]);
+        }
+        DLOGF("\n");
+    }
-            /// output (3,3)
+    /// output (3,3)
-            DLOG << "output memory size : " << output_mul->memory_size();
+    DLOG << "output memory size : " << output_mul->memory_size();
-            DLOG << "output numel : " << output_mul->numel();
+    DLOG << "output numel : " << output_mul->numel();
-            DLOG << inputx_ptr[0] << " x " << inputy_ptr[0] << " + "
+    DLOG << inputx_ptr[0] << " x " << inputy_ptr[0] << " + " << inputx_ptr[1]
-                 << inputx_ptr[1] << " x " << inputy_ptr[0 + 3] << " = "
+         << " x " << inputy_ptr[0 + 3] << " = " << output_mul_ptr[0];
-                 << output_mul_ptr[0];
+}
-        }
+} // namespace test
-    } // namespace test
 } // namespace paddle_mobile
--- a/test/test_include.h
+++ b/test/test_include.h
@@ -7,7 +7,6 @@
 #include "framework/scope.h"
 #include "framework/tensor.h"
 #include "framework/variable.h"
-#include "framework/variable.h"
 #include "io.h"
 #include "test_helper.h"
 #include <map>

--- a/tools/pre-commit.hooks/.clang-format.hook
+++ b/tools/pre-commit.hooks/.clang-format.hook
+#!/bin/bash
+#set -e
+#
+#readonly VERSION="3.8"
+#
+#version=$(clang-format -version)
+#
+#if ! [[ $version == *"$VERSION"* ]]; then
+#    echo "clang-format version check failed."
+#    echo "a version contains '$VERSION' is needed, but get '$version'"
+#    echo "you can install the right version, and make an soft-link to '\$PATH' env"
+#    exit -1
+#fi
+clang-format $@
--- a/tools/pre-commit.hooks/cpplint.bash
+++ b/tools/pre-commit.hooks/cpplint.bash
 #!/bin/bash
+bash -c "cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON"
 TOTAL_ERRORS=0
-#iclang-tidy *.[ch]pp -checks=* 
 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
-for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'|grep -v ".pb." | grep -v "third-party/"); do
+for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v "third-party/" | grep -v ".pb."); do
-    cpplint $file
+    python ./tools/pre-commit.hooks/run-clang-tidy.py $file;
+    echo "python ./tools/pre-commit.hooks/run-clang-tidy.py $file"
    TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
 done
+rm -f compile_commands.json 
 exit $TOTAL_ERRORS
--- a/tools/pre-commit.hooks/.clang_format.hook
+++ b/tools/pre-commit.hooks/.clang_format.hook
-#!/bin/bash
-set -e
-readonly VERSION="3.8"
-version=$(clang-format -version)
-if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
-fi
-clang-format $@
--- a/tools/pre-commit.hooks/clang-format.bash
+++ b/tools/pre-commit.hooks/clang-format.bash
-#!/bin/bash
-set -e
-readonly VERSION="version 3."
-version=$(clang-format -version)
-if ! [[ $version == *"$VERSION"* ]]; then
-    echo "clang-format version check failed."
-    echo "a version contains '$VERSION' is needed, but get '$version'"
-    echo "you can install the right version, and make an soft-link to '\$PATH' env"
-    exit -1
-fi
-clang-format $@
--- a/tools/pre-commit.hooks/run-clang-tidy.py
+++ b/tools/pre-commit.hooks/run-clang-tidy.py
+#!/usr/bin/env python
+#
+#===- run-clang-tidy.py - Parallel clang-tidy runner ---------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+# FIXME: Integrate with clang-tidy-diff.py
+"""
+Parallel clang-tidy runner
+==========================
+Runs clang-tidy over all files in a compilation database. Requires clang-tidy
+and clang-apply-replacements in $PATH.
+Example invocations.
+- Run clang-tidy on all files in the current working directory with a default
+  set of checks and show warnings in the cpp files and all project headers.
+    run-clang-tidy.py $PWD
+- Fix all header guards.
+    run-clang-tidy.py -fix -checks=-*,llvm-header-guard
+- Fix all header guards included from clang-tidy and header guards
+  for clang-tidy headers.
+    run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \
+                      -header-filter=extra/clang-tidy
+Compilation database setup:
+http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
+"""
+from __future__ import print_function
+import argparse
+import glob
+import json
+import multiprocessing
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import traceback
+import yaml
+is_py2 = sys.version[0] == '2'
+if is_py2:
+    import Queue as queue
+else:
+    import queue as queue
+def find_compilation_database(path):
+  """Adjusts the directory until a compilation database is found."""
+  result = './'
+  while not os.path.isfile(os.path.join(result, path)):
+    if os.path.realpath(result) == '/':
+      print('Error: could not find compilation database.')
+      sys.exit(1)
+    result += '../'
+  return os.path.realpath(result)
+def make_absolute(f, directory):
+  if os.path.isabs(f):
+    return f
+  return os.path.normpath(os.path.join(directory, f))
+def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path,
+                        header_filter, extra_arg, extra_arg_before, quiet):
+  """Gets a command line for clang-tidy."""
+  start = [clang_tidy_binary]
+  if header_filter is not None:
+    start.append('-header-filter=' + header_filter)
+  else:
+    # Show warnings in all in-project headers by default.
+    start.append('-header-filter=^' + build_path + '/.*')
+  if checks:
+    start.append('-checks=' + checks)
+  if tmpdir is not None:
+    start.append('-export-fixes')
+    # Get a temporary file. We immediately close the handle so clang-tidy can
+    # overwrite it.
+    (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir)
+    os.close(handle)
+    start.append(name)
+  for arg in extra_arg:
+      start.append('-extra-arg=%s' % arg)
+  for arg in extra_arg_before:
+      start.append('-extra-arg-before=%s' % arg)
+  start.append('-p=' + build_path)
+  if quiet:
+      start.append('-quiet')
+  start.append(f)
+  return start
+def merge_replacement_files(tmpdir, mergefile):
+  """Merge all replacement files in a directory into a single file"""
+  # The fixes suggested by clang-tidy >= 4.0.0 are given under
+  # the top level key 'Diagnostics' in the output yaml files
+  mergekey="Diagnostics"
+  merged=[]
+  for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')):
+    content = yaml.safe_load(open(replacefile, 'r'))
+    if not content:
+      continue # Skip empty files.
+    merged.extend(content.get(mergekey, []))
+  if merged:
+    # MainSourceFile: The key is required by the definition inside
+    # include/clang/Tooling/ReplacementsYaml.h, but the value
+    # is actually never used inside clang-apply-replacements,
+    # so we set it to '' here.
+    output = { 'MainSourceFile': '', mergekey: merged }
+    with open(mergefile, 'w') as out:
+      yaml.safe_dump(output, out)
+  else:
+    # Empty the file:
+    open(mergefile, 'w').close()
+def check_clang_apply_replacements_binary(args):
+  """Checks if invoking supplied clang-apply-replacements binary works."""
+  try:
+    subprocess.check_call([args.clang_apply_replacements_binary, '--version'])
+  except:
+    print('Unable to run clang-apply-replacements. Is clang-apply-replacements '
+          'binary correctly specified?', file=sys.stderr)
+    traceback.print_exc()
+    sys.exit(1)
+def apply_fixes(args, tmpdir):
+  """Calls clang-apply-fixes on a given directory."""
+  invocation = [args.clang_apply_replacements_binary]
+  if args.format:
+    invocation.append('-format')
+  if args.style:
+    invocation.append('-style=' + args.style)
+  invocation.append(tmpdir)
+  subprocess.call(invocation)
+def run_tidy(args, tmpdir, build_path, queue):
+  """Takes filenames out of queue and runs clang-tidy on them."""
+  while True:
+    name = queue.get()
+    invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks,
+                                     tmpdir, build_path, args.header_filter,
+                                     args.extra_arg, args.extra_arg_before,
+                                     args.quiet)
+    sys.stdout.write(' '.join(invocation) + '\n')
+    subprocess.call(invocation)
+    queue.task_done()
+def main():
+  parser = argparse.ArgumentParser(description='Runs clang-tidy over all files '
+                                   'in a compilation database. Requires '
+                                   'clang-tidy and clang-apply-replacements in '
+                                   '$PATH.')
+  parser.add_argument('-clang-tidy-binary', metavar='PATH',
+                      default='clang-tidy',
+                      help='path to clang-tidy binary')
+  parser.add_argument('-clang-apply-replacements-binary', metavar='PATH',
+                      default='clang-apply-replacements',
+                      help='path to clang-apply-replacements binary')
+  parser.add_argument('-checks', default=None,
+                      help='checks filter, when not specified, use clang-tidy '
+                      'default')
+  parser.add_argument('-header-filter', default=None,
+                      help='regular expression matching the names of the '
+                      'headers to output diagnostics from. Diagnostics from '
+                      'the main file of each translation unit are always '
+                      'displayed.')
+  parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes',
+                      help='Create a yaml file to store suggested fixes in, '
+                      'which can be applied with clang-apply-replacements.')
+  parser.add_argument('-j', type=int, default=0,
+                      help='number of tidy instances to be run in parallel.')
+  parser.add_argument('files', nargs='*', default=['.*'],
+                      help='files to be processed (regex on path)')
+  parser.add_argument('-fix', action='store_true', help='apply fix-its')
+  parser.add_argument('-format', action='store_true', help='Reformat code '
+                      'after applying fixes')
+  parser.add_argument('-style', default='file', help='The style of reformat '
+                      'code after applying fixes')
+  parser.add_argument('-p', dest='build_path',
+                      help='Path used to read a compile command database.')
+  parser.add_argument('-extra-arg', dest='extra_arg',
+                      action='append', default=[],
+                      help='Additional argument to append to the compiler '
+                      'command line.')
+  parser.add_argument('-extra-arg-before', dest='extra_arg_before',
+                      action='append', default=[],
+                      help='Additional argument to prepend to the compiler '
+                      'command line.')
+  parser.add_argument('-quiet', action='store_true',
+                      help='Run clang-tidy in quiet mode')
+  args = parser.parse_args()
+  db_path = 'compile_commands.json'
+  if args.build_path is not None:
+    build_path = args.build_path
+  else:
+    # Find our database
+    build_path = find_compilation_database(db_path)
+  try:
+    invocation = [args.clang_tidy_binary, '-list-checks']
+    invocation.append('-p=' + build_path)
+    if args.checks:
+      invocation.append('-checks=' + args.checks)
+    invocation.append('-')
+    subprocess.check_call(invocation)
+  except:
+    print("Unable to run clang-tidy.", file=sys.stderr)
+    sys.exit(1)
+  # Load the database and extract all files.
+  database = json.load(open(os.path.join(build_path, db_path)))
+  files = [make_absolute(entry['file'], entry['directory'])
+           for entry in database]
+  max_task = args.j
+  if max_task == 0:
+    max_task = multiprocessing.cpu_count()
+  tmpdir = None
+  if args.fix or args.export_fixes:
+    check_clang_apply_replacements_binary(args)
+    tmpdir = tempfile.mkdtemp()
+  # Build up a big regexy filter from all command line arguments.
+  file_name_re = re.compile('|'.join(args.files))
+  try:
+    # Spin up a bunch of tidy-launching threads.
+    task_queue = queue.Queue(max_task)
+    for _ in range(max_task):
+      t = threading.Thread(target=run_tidy,
+                           args=(args, tmpdir, build_path, task_queue))
+      t.daemon = True
+      t.start()
+    # Fill the queue with files.
+    for name in files:
+      if file_name_re.search(name):
+        task_queue.put(name)
+    # Wait for all threads to be done.
+    task_queue.join()
+  except KeyboardInterrupt:
+    # This is a sad hack. Unfortunately subprocess goes
+    # bonkers with ctrl-c and we start forking merrily.
+    print('\nCtrl-C detected, goodbye.')
+    if tmpdir:
+      shutil.rmtree(tmpdir)
+    os.kill(0, 9)
+  return_code = 0
+  if args.export_fixes:
+    print('Writing fixes to ' + args.export_fixes + ' ...')
+    try:
+      merge_replacement_files(tmpdir, args.export_fixes)
+    except:
+      print('Error exporting fixes.\n', file=sys.stderr)
+      traceback.print_exc()
+      return_code=1
+  if args.fix:
+    print('Applying fixes ...')
+    try:
+      apply_fixes(args, tmpdir)
+    except:
+      print('Error applying fixes.\n', file=sys.stderr)
+      traceback.print_exc()
+      return_code=1
+  if tmpdir:
+    shutil.rmtree(tmpdir)
+  sys.exit(return_code)
+if __name__ == '__main__':
+  main()