modify to 2 spaces indent & format code & rm build folder

ddf6b722 · 朔-望 · e35ef6fe · ddf6b722 · e35ef6fe · ddf6b722
102 changed file
--- a/.clang-format
+++ b/.clang-format
@@ -2,5 +2,4 @@
 Language:        Cpp
 BasedOnStyle:  LLVM
 Standard:  Cpp11 
-IndentWidth: 4
 ...
--- a/cmake-build-release/compile_commands.json
+++ b/cmake-build-release/compile_commands.json
-[
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/ddim.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/lod_tensor.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/scope.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/tensor_util.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/memory/t_malloc.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/im2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/math_function.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/math/vol2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/common/variant.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/attribute.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/block_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/data_transform.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/executor.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/framework.pb.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/op_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/operator.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/paddle_mobile_object.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/program.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/program_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/framework/var_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/io.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/conv_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/elementwise_add_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/elementwise_add_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/arm/mul_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/kernel/fpga/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/mul_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-static.dir/src/operators/op_param.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/ddim.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/ddim.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/lod_tensor.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/lod_tensor.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/scope.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/scope.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/tensor_util.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/tensor_util.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/memory/t_malloc.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/memory/t_malloc.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/im2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/im2col.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/math_function.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/math_function.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/math/vol2col.cc.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/math/vol2col.cc"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/common/variant.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/common/variant.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/attribute.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/attribute.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/block_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/block_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/data_transform.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/data_transform.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/executor.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/executor.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/framework.pb.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/framework.pb.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/op_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/op_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/operator.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/operator.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/paddle_mobile_object.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/paddle_mobile_object.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/program.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/program_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/program_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/framework/var_desc.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/framework/var_desc.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/io.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/io.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/conv_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/conv_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/elementwise_add_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/elementwise_add_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/conv_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/elementwise_add_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/elementwise_add_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/arm/mul_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/arm/mul_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/kernel/fpga/conv_kernel.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/kernel/fpga/conv_kernel.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/mul_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/mul_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -Dpaddle_mobile_EXPORTS -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG -fPIC   -std=c++11 -o CMakeFiles/paddle-mobile.dir/src/operators/op_param.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/src/operators/op_param.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-log.dir/common/test_log.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/common/test_log.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/common/test_log.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-conv-op.dir/operators/test_cov_op.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/operators/test_cov_op.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/operators/test_cov_op.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/test-load.dir/framework/test_load.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/framework/test_load.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/framework/test_load.cpp"
-},
-{
-  "directory": "/Users/allonli/Documents/workspace/paddle-mobile/cmake-build-release/test",
-  "command": "/Library/Developer/CommandLineTools/usr/bin/c++  -DPADDLE_MOBILE_DEBUG=\\\"true\\\" -I/Users/allonli/Documents/workspace/paddle-mobile/src -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/openblas/include -I/Users/allonli/Documents/workspace/paddle-mobile/third-party/protobuf/include  -O2 -g -DNDEBUG   -std=c++11 -o CMakeFiles/paddle-mobile-test.dir/main.cpp.o -c /Users/allonli/Documents/workspace/paddle-mobile/test/main.cpp",
-  "file": "/Users/allonli/Documents/workspace/paddle-mobile/test/main.cpp"
-}
-]
\ No newline at end of file
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -28,15 +28,15 @@ SOFTWARE.
 namespace paddle_mobile {

 enum LogLevel {
-    kNO_LOG,
-    kLOG_ERROR,
-    kLOG_WARNING,
-    kLOG_INFO,
-    kLOG_DEBUG,
-    kLOG_DEBUG1,
-    kLOG_DEBUG2,
-    kLOG_DEBUG3,
-    kLOG_DEBUG4
+  kNO_LOG,
+  kLOG_ERROR,
+  kLOG_WARNING,
+  kLOG_INFO,
+  kLOG_DEBUG,
+  kLOG_DEBUG1,
+  kLOG_DEBUG2,
+  kLOG_DEBUG3,
+  kLOG_DEBUG4
 };

 // log level
@@ -49,119 +49,117 @@ struct ToLog;
 struct Print;

 struct Print {
-    friend struct ToLog;
-
-    template <typename T> Print &operator<<(T const &value) {
-        buffer_ << value;
-        return *this;
-    }
-
-  private:
-    void print(LogLevel level) {
-        buffer_ << std::endl;
-        if (level == kLOG_ERROR) {
-            std::cerr << buffer_.str();
-        } else {
-            std::cout << buffer_.str();
-        }
+  friend struct ToLog;
+
+  template <typename T> Print &operator<<(T const &value) {
+    buffer_ << value;
+    return *this;
+  }
+
+private:
+  void print(LogLevel level) {
+    buffer_ << std::endl;
+    if (level == kLOG_ERROR) {
+      std::cerr << buffer_.str();
+    } else {
+      std::cout << buffer_.str();
    }
-    std::ostringstream buffer_;
+  }
+  std::ostringstream buffer_;
 };

 struct ToLog {
-    ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
-        : level_(level) {
-        unsigned blanks =
-            (unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
-        printer_ << logs[level] << " " << info << ":"
-                 << std::string(blanks, ' ');
-    }
-
-    template <typename T> ToLog &operator<<(T const &value) {
-        printer_ << value;
-        return *this;
-    }
-
-    ~ToLog() { printer_.print(level_); }
-
-  private:
-    LogLevel level_;
-    Print printer_;
+  ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
+      : level_(level) {
+    unsigned blanks =
+        (unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
+    printer_ << logs[level] << " " << info << ":" << std::string(blanks, ' ');
+  }
+
+  template <typename T> ToLog &operator<<(T const &value) {
+    printer_ << value;
+    return *this;
+  }
+
+  ~ToLog() { printer_.print(level_); }
+
+private:
+  LogLevel level_;
+  Print printer_;
 };

 #define LOG(level)                                                             \
-    if (level > paddle_mobile::log_level) {                                    \
-    } else                                                                     \
-        paddle_mobile::ToLog(                                                  \
-            level, (std::stringstream()                                        \
-                    << "[file: "                                               \
-                    << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1)  \
-                                               : __FILE__)                     \
-                    << "] [line: " << __LINE__ << "] ")                        \
-                       .str())
+  if (level > paddle_mobile::log_level) {                                      \
+  } else                                                                       \
+    paddle_mobile::ToLog(                                                      \
+        level,                                                                 \
+        (std::stringstream()                                                   \
+         << "[file: "                                                          \
+         << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \
+         << "] [line: " << __LINE__ << "] ")                                   \
+            .str())

 #define DLOG                                                                   \
-    if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                \
-    } else                                                                     \
-        paddle_mobile::ToLog(                                                  \
-            paddle_mobile::kLOG_DEBUG,                                         \
-            (std::stringstream()                                               \
-             << "[file: "                                                      \
-             << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1)         \
-                                        : __FILE__)                            \
-             << "] [line: " << __LINE__ << "] ")                               \
-                .str())
+  if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                  \
+  } else                                                                       \
+    paddle_mobile::ToLog(                                                      \
+        paddle_mobile::kLOG_DEBUG,                                             \
+        (std::stringstream()                                                   \
+         << "[file: "                                                          \
+         << (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) \
+         << "] [line: " << __LINE__ << "] ")                                   \
+            .str())
 } // namespace paddle_mobile

 #define LOGF(level, format, ...)                                               \
-    if (level > paddle_mobile::log_level) {                                    \
-    } else                                                                     \
-        printf(format, ##__VA_ARGS__)
+  if (level > paddle_mobile::log_level) {                                      \
+  } else                                                                       \
+    printf(format, ##__VA_ARGS__)

 #define DLOGF(format, ...)                                                     \
-    if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                \
-    } else                                                                     \
-        printf(format, ##__VA_ARGS__)
+  if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) {                  \
+  } else                                                                       \
+    printf(format, ##__VA_ARGS__)

 #else

 namespace paddle_mobile {

 enum LogLevel {
-    kNO_LOG,
-    kLOG_ERROR,
-    kLOG_WARNING,
-    kLOG_INFO,
-    kLOG_DEBUG,
-    kLOG_DEBUG1,
-    kLOG_DEBUG2,
-    kLOG_DEBUG3,
-    kLOG_DEBUG4
+  kNO_LOG,
+  kLOG_ERROR,
+  kLOG_WARNING,
+  kLOG_INFO,
+  kLOG_DEBUG,
+  kLOG_DEBUG1,
+  kLOG_DEBUG2,
+  kLOG_DEBUG3,
+  kLOG_DEBUG4
 };

 struct ToLog;
 struct Print {
-    friend struct ToLog;
-    template <typename T> Print &operator<<(T const &value) {}
+  friend struct ToLog;
+  template <typename T> Print &operator<<(T const &value) {}

-  private:
+private:
 };

 struct ToLog {
-    ToLog(LogLevel level) {}
+  ToLog(LogLevel level) {}

-    template <typename T> ToLog &operator<<(T const &value) { return *this; }
+  template <typename T> ToLog &operator<<(T const &value) { return *this; }
 };

 #define LOG(level)                                                             \
-    if (true) {                                                                \
-    } else                                                                     \
-        paddle_mobile::ToLog(level)
+  if (true) {                                                                  \
+  } else                                                                       \
+    paddle_mobile::ToLog(level)

 #define DLOG                                                                   \
-    if (true) {                                                                \
-    } else                                                                     \
-        paddle_mobile::ToLog(paddle_mobile::kLOG_DEBUG)
+  if (true) {                                                                  \
+  } else                                                                       \
+    paddle_mobile::ToLog(paddle_mobile::kLOG_DEBUG)

 #define LOGF(level, format, ...)


--- a/src/common/types.h
+++ b/src/common/types.h
@@ -32,32 +32,32 @@ typedef DeviceType<kGPU_MALI> GPU_MALI;

 //! data type
 enum DataType {
-    PM_INVALID = -1,
-    PM_HALF = 0,
-    PM_FLOAT = 1,
-    PM_DOUBLE = 2,
-    PM_INT8 = 3,
-    PM_INT16 = 4,
-    PM_INT32 = 5,
-    PM_INT64 = 6,
-    PM_UINT8 = 7,
-    PM_UINT16 = 8,
-    PM_UINT32 = 9,
-    PM_STRING = 10,
-    PM_BOOL = 11,
-    PM_SHAPE = 12,
-    PM_TENSOR = 13
+  PM_INVALID = -1,
+  PM_HALF = 0,
+  PM_FLOAT = 1,
+  PM_DOUBLE = 2,
+  PM_INT8 = 3,
+  PM_INT16 = 4,
+  PM_INT32 = 5,
+  PM_INT64 = 6,
+  PM_UINT8 = 7,
+  PM_UINT16 = 8,
+  PM_UINT32 = 9,
+  PM_STRING = 10,
+  PM_BOOL = 11,
+  PM_SHAPE = 12,
+  PM_TENSOR = 13
 };
 //!
 enum PMStatus {
-    PMSuccess = 0xFF,        /*!< No errors */
-    PMNotInitialized = 0x01, /*!< Data not initialized. */
-    PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
-    PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
-    PMUnKownError = 0x04,    /*!< Unknown error. */
-    PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
-    PMOutOfMem = 0x06,       /*!< OOM error*/
-    PMUnImplError = 0x07,    /*!< Unimplement error. */
-    PMWrongDevice = 0x08     /*!< un-correct device. */
+  PMSuccess = 0xFF,        /*!< No errors */
+  PMNotInitialized = 0x01, /*!< Data not initialized. */
+  PMInvalidValue = 0x02,   /*!< Incorrect variable value. */
+  PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
+  PMUnKownError = 0x04,    /*!< Unknown error. */
+  PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
+  PMOutOfMem = 0x06,       /*!< OOM error*/
+  PMUnImplError = 0x07,    /*!< Unimplement error. */
+  PMWrongDevice = 0x08     /*!< un-correct device. */
 };
 } // namespace paddle_mobile
--- a/src/common/variant.h
+++ b/src/common/variant.h
@@ -24,74 +24,74 @@ namespace paddle_mobile {
 template <int ID, typename Type> struct IDToType { typedef Type type_t; };

 template <typename F, typename... Ts> struct VariantHelper {
-    static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
-                                   ? sizeof(F)
-                                   : VariantHelper<Ts...>::size;
+  static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
+                                 ? sizeof(F)
+                                 : VariantHelper<Ts...>::size;

-    inline static void Destroy(size_t id, void *data) {
-        if (id == typeid(F).hash_code()) {
-            reinterpret_cast<F *>(data)->~F();
-        } else {
-            VariantHelper<Ts...>::Destroy(id, data);
-        }
+  inline static void Destroy(size_t id, void *data) {
+    if (id == typeid(F).hash_code()) {
+      reinterpret_cast<F *>(data)->~F();
+    } else {
+      VariantHelper<Ts...>::Destroy(id, data);
    }
+  }
 };

 template <typename F> struct VariantHelper<F> {
-    static const size_t size = sizeof(F);
-    inline static void Destroy(size_t id, void *data) {
-        if (id == typeid(F).hash_code()) {
-            //              reinterpret_cast<F*>(data)->~F();
-        } else {
-            //              std::cout << "未匹配到 " << std::endl;
-        }
+  static const size_t size = sizeof(F);
+  inline static void Destroy(size_t id, void *data) {
+    if (id == typeid(F).hash_code()) {
+      //              reinterpret_cast<F*>(data)->~F();
+    } else {
+      //              std::cout << "未匹配到 " << std::endl;
    }
+  }
 };

 template <size_t size> class RawData {
-  public:
-    char data[size];
-    RawData() {}
-    RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
-    //      void operator=(const RawData &raw_data){
-    //        strcpy(data, raw_data.data);
-    //      }
+public:
+  char data[size];
+  RawData() {}
+  RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
+  //      void operator=(const RawData &raw_data){
+  //        strcpy(data, raw_data.data);
+  //      }
 };

 template <typename... Ts> struct Variant {
-    Variant(const Variant &variant) {
-        //        std::cout << " 赋值构造函数 " << std::endl;
-        type_id = variant.type_id;
-        data = variant.data;
-    }
+  Variant(const Variant &variant) {
+    //        std::cout << " 赋值构造函数 " << std::endl;
+    type_id = variant.type_id;
+    data = variant.data;
+  }

-    Variant() : type_id(invalid_type()) {}
-    ~Variant() {
-        //        helper::Destroy(type_id, &data);
-    }
+  Variant() : type_id(invalid_type()) {}
+  ~Variant() {
+    //        helper::Destroy(type_id, &data);
+  }

-    template <typename T, typename... Args> void Set(Args &&... args) {
-        helper::Destroy(type_id, &data);
-        new (&data) T(std::forward<Args>(args)...);
-        type_id = typeid(T).hash_code();
-    }
+  template <typename T, typename... Args> void Set(Args &&... args) {
+    helper::Destroy(type_id, &data);
+    new (&data) T(std::forward<Args>(args)...);
+    type_id = typeid(T).hash_code();
+  }

-    template <typename T> T &Get() const {
-        if (type_id == typeid(T).hash_code()) {
-            return *const_cast<T *>(reinterpret_cast<const T *>(&data));
-        } else {
-            //      std::cout << " bad cast in variant " << std::endl;
-            throw std::bad_cast();
-        }
+  template <typename T> T &Get() const {
+    if (type_id == typeid(T).hash_code()) {
+      return *const_cast<T *>(reinterpret_cast<const T *>(&data));
+    } else {
+      //      std::cout << " bad cast in variant " << std::endl;
+      throw std::bad_cast();
    }
+  }

-    size_t TypeId() const { return type_id; }
+  size_t TypeId() const { return type_id; }

-  private:
-    static inline size_t invalid_type() { return typeid(void).hash_code(); }
-    typedef VariantHelper<Ts...> helper;
-    size_t type_id;
-    RawData<helper::size> data;
+private:
+  static inline size_t invalid_type() { return typeid(void).hash_code(); }
+  typedef VariantHelper<Ts...> helper;
+  size_t type_id;
+  RawData<helper::size> data;
 };

 template <typename T> struct Vistor { typedef T type_t; };

--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -27,102 +27,102 @@ namespace framework {
 class BlockDesc;

 class Attribute {
-  public:
-    static Attribute GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
-        //    std::cout << "begin get attr value" << std::endl;
-        Attribute attr;
-        switch (attr_desc.type()) {
-        case proto::AttrType::BOOLEAN: {
-            attr.Set<bool>(attr_desc.b());
-            break;
-        }
-        case proto::AttrType::INT: {
-            attr.Set<int>(attr_desc.i());
-            break;
-        }
-        case proto::AttrType::FLOAT: {
-            attr.Set<float>(attr_desc.f());
-            break;
-        }
-        case proto::AttrType::STRING: {
-            attr.Set<std::string>(attr_desc.s());
-            break;
-        }
-        case proto::AttrType::BOOLEANS: {
-            std::vector<bool> val(attr_desc.bools_size());
-            for (int i = 0; i < attr_desc.bools_size(); ++i) {
-                val[i] = attr_desc.bools(i);
-            }
-            attr.Set<std::vector<bool>>(val);
-            break;
-        }
-        case proto::AttrType::INTS: {
-            std::vector<int> val(attr_desc.ints_size());
-            for (int i = 0; i < attr_desc.ints_size(); ++i) {
-                val[i] = attr_desc.ints(i);
-            }
-            attr.Set<std::vector<int>>(val);
-            break;
-        }
-        case proto::AttrType::FLOATS: {
-            std::vector<float> val(attr_desc.floats_size());
-            for (int i = 0; i < attr_desc.floats_size(); ++i) {
-                val[i] = attr_desc.floats(i);
-            }
-            attr.Set<std::vector<float>>(val);
-            break;
-        }
-        case proto::AttrType::STRINGS: {
-            std::vector<std::string> val(attr_desc.strings_size());
-            for (int i = 0; i < attr_desc.strings_size(); ++i) {
-                val[i] = attr_desc.strings(i);
-            }
-            attr.Set<std::vector<std::string>>(val);
-            break;
-        }
-        case proto::AttrType::LONG: {
-            attr.Set<int64_t>(attr_desc.l());
-            break;
-        }
-        default:
-            //        std::cout << " not support " << std::endl;
-            break;
-        }
-        //    std::cout << "end get attr value" << std::endl;
-        return attr;
+public:
+  static Attribute GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
+    //    std::cout << "begin get attr value" << std::endl;
+    Attribute attr;
+    switch (attr_desc.type()) {
+    case proto::AttrType::BOOLEAN: {
+      attr.Set<bool>(attr_desc.b());
+      break;
    }
-
-    Attribute() {}
-    template <typename T, typename... Args> Attribute &Set(Args &&... args) {
-        variant_.Set<T>(args...);
-        return *this;
+    case proto::AttrType::INT: {
+      attr.Set<int>(attr_desc.i());
+      break;
+    }
+    case proto::AttrType::FLOAT: {
+      attr.Set<float>(attr_desc.f());
+      break;
+    }
+    case proto::AttrType::STRING: {
+      attr.Set<std::string>(attr_desc.s());
+      break;
+    }
+    case proto::AttrType::BOOLEANS: {
+      std::vector<bool> val(attr_desc.bools_size());
+      for (int i = 0; i < attr_desc.bools_size(); ++i) {
+        val[i] = attr_desc.bools(i);
+      }
+      attr.Set<std::vector<bool>>(val);
+      break;
+    }
+    case proto::AttrType::INTS: {
+      std::vector<int> val(attr_desc.ints_size());
+      for (int i = 0; i < attr_desc.ints_size(); ++i) {
+        val[i] = attr_desc.ints(i);
+      }
+      attr.Set<std::vector<int>>(val);
+      break;
+    }
+    case proto::AttrType::FLOATS: {
+      std::vector<float> val(attr_desc.floats_size());
+      for (int i = 0; i < attr_desc.floats_size(); ++i) {
+        val[i] = attr_desc.floats(i);
+      }
+      attr.Set<std::vector<float>>(val);
+      break;
    }
+    case proto::AttrType::STRINGS: {
+      std::vector<std::string> val(attr_desc.strings_size());
+      for (int i = 0; i < attr_desc.strings_size(); ++i) {
+        val[i] = attr_desc.strings(i);
+      }
+      attr.Set<std::vector<std::string>>(val);
+      break;
+    }
+    case proto::AttrType::LONG: {
+      attr.Set<int64_t>(attr_desc.l());
+      break;
+    }
+    default:
+      //        std::cout << " not support " << std::endl;
+      break;
+    }
+    //    std::cout << "end get attr value" << std::endl;
+    return attr;
+  }

-    template <typename T> T &Get() const { return variant_.Get<T>(); }
+  Attribute() {}
+  template <typename T, typename... Args> Attribute &Set(Args &&... args) {
+    variant_.Set<T>(args...);
+    return *this;
+  }

-  private:
-    Variant<int, float, std::string, std::vector<int>, std::vector<float>,
-            std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
-            int64_t>
-        variant_;
+  template <typename T> T &Get() const { return variant_.Get<T>(); }
+
+private:
+  Variant<int, float, std::string, std::vector<int>, std::vector<float>,
+          std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
+          int64_t>
+      variant_;
 };

 using AttributeMap = std::unordered_map<std::string, Attribute>;

 class AttrReader {
-  public:
-    explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
+public:
+  explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}

-    template <typename T> inline T Get(const std::string &name) const {
-        //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
-        //          be in
-        //          AttributeMap",
-        //                         name);
-        return ((Attribute)attrs_.at(name)).Get<T>();
-    }
+  template <typename T> inline T Get(const std::string &name) const {
+    //          PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
+    //          be in
+    //          AttributeMap",
+    //                         name);
+    return ((Attribute)attrs_.at(name)).Get<T>();
+  }

-  private:
-    const AttributeMap &attrs_;
+private:
+  const AttributeMap &attrs_;
 };

 } // namespace framework

--- a/src/framework/block_desc.cpp
+++ b/src/framework/block_desc.cpp
@@ -22,28 +22,28 @@ namespace paddle_mobile {
 namespace framework {

 std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
-    std::vector<std::shared_ptr<VarDesc>> res;
-    for (const auto &p : vars_) {
-        res.push_back(p.second);
-    }
-    return res;
+  std::vector<std::shared_ptr<VarDesc>> res;
+  for (const auto &p : vars_) {
+    res.push_back(p.second);
+  }
+  return res;
 }

 std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const {
-    std::vector<std::shared_ptr<OpDesc>> res;
-    for (const auto &op : ops_) {
-        res.push_back(op);
-    }
-    return res;
+  std::vector<std::shared_ptr<OpDesc>> res;
+  for (const auto &op : ops_) {
+    res.push_back(op);
+  }
+  return res;
 }

 BlockDesc::BlockDesc(const proto::BlockDesc &desc) : desc_(desc) {
-    for (const proto::VarDesc &var_desc : desc_.vars()) {
-        vars_[var_desc.name()].reset(new VarDesc(var_desc));
-    }
-    for (const proto::OpDesc &op_desc : desc_.ops()) {
-        ops_.emplace_back(new framework::OpDesc(op_desc));
-    }
+  for (const proto::VarDesc &var_desc : desc_.vars()) {
+    vars_[var_desc.name()].reset(new VarDesc(var_desc));
+  }
+  for (const proto::OpDesc &op_desc : desc_.ops()) {
+    ops_.emplace_back(new framework::OpDesc(op_desc));
+  }
 }

 } // namespace framework

--- a/src/framework/block_desc.h
+++ b/src/framework/block_desc.h
@@ -27,29 +27,28 @@ namespace paddle_mobile {
 namespace framework {

 class BlockDesc : PaddleMobileObject {
-  public:
-    BlockDesc(const proto::BlockDesc &desc);
+public:
+  BlockDesc(const proto::BlockDesc &desc);

-    const int &ID() const { return desc_.idx(); }
+  const int &ID() const { return desc_.idx(); }

-    const int &Parent() const { return desc_.parent_idx(); }
+  const int &Parent() const { return desc_.parent_idx(); }

-    bool operator==(const paddle_mobile::framework::BlockDesc &in_block) const {
-        return this->ID() == in_block.ID() &&
-               this->Parent() == in_block.Parent();
-    }
+  bool operator==(const paddle_mobile::framework::BlockDesc &in_block) const {
+    return this->ID() == in_block.ID() && this->Parent() == in_block.Parent();
+  }

-    bool operator<(const paddle_mobile::framework::BlockDesc &in_block) const {
-        return this->ID() < in_block.ID() && this->Parent() < in_block.Parent();
-    }
+  bool operator<(const paddle_mobile::framework::BlockDesc &in_block) const {
+    return this->ID() < in_block.ID() && this->Parent() < in_block.Parent();
+  }

-    std::vector<std::shared_ptr<VarDesc>> Vars() const;
-    std::vector<std::shared_ptr<OpDesc>> Ops() const;
+  std::vector<std::shared_ptr<VarDesc>> Vars() const;
+  std::vector<std::shared_ptr<OpDesc>> Ops() const;

-  private:
-    proto::BlockDesc desc_;
-    std::vector<std::shared_ptr<OpDesc>> ops_;
-    std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
+private:
+  proto::BlockDesc desc_;
+  std::vector<std::shared_ptr<OpDesc>> ops_;
+  std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
 };

 } // namespace framework
@@ -58,13 +57,13 @@ class BlockDesc : PaddleMobileObject {
 namespace std {

 template <> struct hash<paddle_mobile::framework::BlockDesc> {
-    typedef paddle_mobile::framework::BlockDesc argument_type;
-    typedef std::size_t result_type;
-    result_type operator()(argument_type const &s) const noexcept {
-        result_type const h1(std::hash<int>{}(s.ID()));
-        result_type const h2(std::hash<int>{}(s.ID()));
-        return h1 ^ (h2 << 1);
-    }
+  typedef paddle_mobile::framework::BlockDesc argument_type;
+  typedef std::size_t result_type;
+  result_type operator()(argument_type const &s) const noexcept {
+    result_type const h1(std::hash<int>{}(s.ID()));
+    result_type const h2(std::hash<int>{}(s.ID()));
+    return h1 ^ (h2 << 1);
+  }
 };

 } // namespace std
--- a/src/framework/data_layout.h
+++ b/src/framework/data_layout.h
@@ -22,45 +22,45 @@ namespace paddle_mobile {
 namespace framework {

 enum class DataLayout {
-    kNHWC = 0,
-    kNCHW = 1,
-    kAnyLayout = 2,
+  kNHWC = 0,
+  kNCHW = 1,
+  kAnyLayout = 2,
 };

 inline DataLayout StringToDataLayout(const std::string &str) {
-    std::string s(str);
-    for (size_t i = 0; i < s.size(); ++i) {
-        s[i] = toupper(s[i]);
-    }
+  std::string s(str);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = toupper(s[i]);
+  }

-    if (s == "NHWC") {
-        return DataLayout::kNHWC;
-    } else if (s == "NCHW") {
-        return DataLayout::kNCHW;
-    } else if (s == "ANYLAYOUT") {
-        return DataLayout::kAnyLayout;
-    } else {
-        //    std::cout << "Unknown storage order string: %s", s;
-    }
+  if (s == "NHWC") {
+    return DataLayout::kNHWC;
+  } else if (s == "NCHW") {
+    return DataLayout::kNCHW;
+  } else if (s == "ANYLAYOUT") {
+    return DataLayout::kAnyLayout;
+  } else {
+    //    std::cout << "Unknown storage order string: %s", s;
+  }
 }

 inline std::string DataLayoutToString(const DataLayout &data_layout) {
-    switch (data_layout) {
-    case DataLayout::kNHWC:
-        return "NHWC";
-    case DataLayout::kNCHW:
-        return "NCHW";
-    case DataLayout::kAnyLayout:
-        return "ANY_LAYOUT";
-    default:
-        break;
-        //      std::cout << "unknown DataLayou %d", data_layout;
-    }
+  switch (data_layout) {
+  case DataLayout::kNHWC:
+    return "NHWC";
+  case DataLayout::kNCHW:
+    return "NCHW";
+  case DataLayout::kAnyLayout:
+    return "ANY_LAYOUT";
+  default:
+    break;
+    //      std::cout << "unknown DataLayou %d", data_layout;
+  }
 }

 inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
-    out << DataLayoutToString(l);
-    return out;
+  out << DataLayoutToString(l);
+  return out;
 }

 } // namespace framework

--- a/src/framework/data_transform.cpp
+++ b/src/framework/data_transform.cpp
@@ -24,68 +24,68 @@ namespace paddle_mobile {
 namespace framework {

 static void PassTensorData(Tensor *from, Tensor *to) {
-    to->ShareDataWith(*from);
-    *from = Tensor();
+  to->ShareDataWith(*from);
+  *from = Tensor();
 }

 void DataTransform(const OpKernelType &expected_kernel_type,
                   const OpKernelType &kernel_type_for_var,
                   const Tensor &input_tensor, Tensor *output_tensor) {
-    bool transformed = false;
-    Tensor in;
-    in.ShareDataWith(input_tensor);
-    Tensor out;
+  bool transformed = false;
+  Tensor in;
+  in.ShareDataWith(input_tensor);
+  Tensor out;

-    //  // do layout transform
-    //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
-    //                          kernel_type_for_var.data_layout_)) {
-    //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
-    //    &out);
-    //    transformed = true;
-    //    PassTensorData(&out, &in);
-    //  }
-    //
-    //  // do data type transform
-    //  if (expected_kernel_type.data_type_ !=
-    //  kernel_type_for_var.data_type_) {
-    //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
-    //    &out);
-    //    transformed = true;
-    //    PassTensorData(&out, &in);
-    //  }
-    //
-    //  // do device transform
-    //  if (!platform::is_same_place(kernel_type_for_var.place_,
-    //                               expected_kernel_type.place_)) {
-    //    TransDataDevice(in, expected_kernel_type.place_, &out);
-    //    transformed = true;
-    //    PassTensorData(&out, &in);
-    //  }
-    //
-    //  PADDLE_ENFORCE(transformed, "No transform is applied, please
-    //  check!");
-    // get output data
-    output_tensor->ShareDataWith(in);
+  //  // do layout transform
+  //  if (NeedTransformLayout(expected_kernel_type.data_layout_,
+  //                          kernel_type_for_var.data_layout_)) {
+  //    TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
+  //    &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  // do data type transform
+  //  if (expected_kernel_type.data_type_ !=
+  //  kernel_type_for_var.data_type_) {
+  //    TransDataType(kernel_type_for_var, expected_kernel_type, in,
+  //    &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  // do device transform
+  //  if (!platform::is_same_place(kernel_type_for_var.place_,
+  //                               expected_kernel_type.place_)) {
+  //    TransDataDevice(in, expected_kernel_type.place_, &out);
+  //    transformed = true;
+  //    PassTensorData(&out, &in);
+  //  }
+  //
+  //  PADDLE_ENFORCE(transformed, "No transform is applied, please
+  //  check!");
+  // get output data
+  output_tensor->ShareDataWith(in);
 }

 void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
                            Variable &out_var) {
-    //  if (in_var.IsType<LoDTensor>()) {
-    //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
-    //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
-    //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
-    //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
-    //    tran_lod_tensor->ShareDataWith(tensor);
-    //  } else if (in_var.IsType<SelectedRows>()) {
-    //    auto& in_selected_rows = in_var.Get<SelectedRows>();
-    //    auto* trans_selected_rows =
-    //    out_var.GetMutable<SelectedRows>();
-    //    trans_selected_rows->set_height(in_selected_rows.height());
-    //    trans_selected_rows->set_rows(in_selected_rows.rows());
-    //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
-    //  } else {
-    //    PADDLE_THROW("unknown var type");
-    //  }
+  //  if (in_var.IsType<LoDTensor>()) {
+  //    auto& in_lod_tensor = in_var.Get<LoDTensor>();
+  //    auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
+  //    tran_lod_tensor->set_lod(in_lod_tensor.lod());
+  //    tran_lod_tensor->set_layout(in_lod_tensor.layout());
+  //    tran_lod_tensor->ShareDataWith(tensor);
+  //  } else if (in_var.IsType<SelectedRows>()) {
+  //    auto& in_selected_rows = in_var.Get<SelectedRows>();
+  //    auto* trans_selected_rows =
+  //    out_var.GetMutable<SelectedRows>();
+  //    trans_selected_rows->set_height(in_selected_rows.height());
+  //    trans_selected_rows->set_rows(in_selected_rows.rows());
+  //    trans_selected_rows->mutable_value()->ShareDataWith(tensor);
+  //  } else {
+  //    PADDLE_THROW("unknown var type");
+  //  }
 }

 } // namespace framework

--- a/src/framework/ddim.cc
+++ b/src/framework/ddim.cc
@@ -20,158 +20,156 @@ namespace framework {
 /// @cond HIDDEN

 template <int i> Dim<i> make_dim(const int64_t *d) {
-    return Dim<i>(*d, make_dim<i - 1>(d + 1));
+  return Dim<i>(*d, make_dim<i - 1>(d + 1));
 }

 template <> Dim<0> make_dim<0>(const int64_t *d) { return Dim<0>(*d); }

 void make_ddim(DDim &ddim, const int64_t *dims, int n) {
-    switch (n) {
-    case 0:
-        ddim = make_dim<0>(dims);
-        break;
-    case 1:
-        ddim = make_dim<1>(dims);
-        break;
-    case 2:
-        ddim = make_dim<2>(dims);
-        break;
-    case 3:
-        ddim = make_dim<3>(dims);
-        break;
-    case 4:
-        ddim = make_dim<4>(dims);
-        break;
-    case 5:
-        ddim = make_dim<5>(dims);
-        break;
-    case 6:
-        ddim = make_dim<6>(dims);
-        break;
-    case 7:
-        ddim = make_dim<7>(dims);
-        break;
-    case 8:
-        ddim = make_dim<8>(dims);
-        break;
-    case 9:
-        ddim = make_dim<9>(dims);
-        break;
-    default:
-        //      std::cout << "Dynamic dimensions must have between [1,
-        //      9]
-        //      dimensions.";
-        break;
-    }
+  switch (n) {
+  case 0:
+    ddim = make_dim<0>(dims);
+    break;
+  case 1:
+    ddim = make_dim<1>(dims);
+    break;
+  case 2:
+    ddim = make_dim<2>(dims);
+    break;
+  case 3:
+    ddim = make_dim<3>(dims);
+    break;
+  case 4:
+    ddim = make_dim<4>(dims);
+    break;
+  case 5:
+    ddim = make_dim<5>(dims);
+    break;
+  case 6:
+    ddim = make_dim<6>(dims);
+    break;
+  case 7:
+    ddim = make_dim<7>(dims);
+    break;
+  case 8:
+    ddim = make_dim<8>(dims);
+    break;
+  case 9:
+    ddim = make_dim<9>(dims);
+    break;
+  default:
+    //      std::cout << "Dynamic dimensions must have between [1,
+    //      9]
+    //      dimensions.";
+    break;
+  }
 }

 /// @endcond

 DDim make_ddim(std::initializer_list<int64_t> dims) {
-    DDim result(make_dim(0));
-    make_ddim(result, dims.begin(), dims.size());
-    return result;
+  DDim result(make_dim(0));
+  make_ddim(result, dims.begin(), dims.size());
+  return result;
 }

 DDim make_ddim(const std::vector<int64_t> &dims) {
-    DDim result(make_dim(0));
-    make_ddim(result, &dims[0], dims.size());
-    return result;
+  DDim result(make_dim(0));
+  make_ddim(result, &dims[0], dims.size());
+  return result;
 }

 DDim make_ddim(const std::vector<int> &dims) {
-    std::vector<int64_t> res(dims.size());
-    std::transform(dims.begin(), dims.end(), res.begin(),
-                   [](int d) { return static_cast<int64_t>(d); });
-    return make_ddim(res);
+  std::vector<int64_t> res(dims.size());
+  std::transform(dims.begin(), dims.end(), res.begin(),
+                 [](int d) { return static_cast<int64_t>(d); });
+  return make_ddim(res);
 }

 /// @cond HIDDEN
 // XXX For some reason, putting this in an anonymous namespace causes
 // errors
 struct DynamicMutableIndexer : Vistor<int64_t &> {
-  public:
-    explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
+public:
+  explicit DynamicMutableIndexer(int idx) : idx_(idx) {}

-    template <int D> int64_t &operator()(Dim<D> &dim) const {
-        return dim[idx_];
-    }
+  template <int D> int64_t &operator()(Dim<D> &dim) const { return dim[idx_]; }

-  private:
-    int idx_;
+private:
+  int idx_;
 };

 struct DynamicConstIndexer : public Vistor<int64_t> {
-  public:
-    explicit DynamicConstIndexer(int idx) : idx_(idx) {}
+public:
+  explicit DynamicConstIndexer(int idx) : idx_(idx) {}

-    template <int D> int64_t operator()(const Dim<D> &dim) const {
-        return dim[idx_];
-    }
+  template <int D> int64_t operator()(const Dim<D> &dim) const {
+    return dim[idx_];
+  }

-  private:
-    int idx_;
+private:
+  int idx_;
 };

 /// @endcond

 int64_t &DDim::operator[](int idx) {
-    return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
+  return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
 }

 int64_t DDim::operator[](int idx) const {
-    return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
+  return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
 }

 int DDim::size() const { return arity(*this); }

 bool DDim::operator==(DDim d) const {
-    //  if (var.which() != d.getVar().which()) {
-    //    return false;
-    //  } else {
-    std::vector<int64_t> v1 = vectorize(*this);
-    std::vector<int64_t> v2 = vectorize(d);
-
-    for (unsigned int i = 0; i < v1.size(); i++) {
-        if (v1[i] != v2[i]) {
-            return false;
-        }
+  //  if (var.which() != d.getVar().which()) {
+  //    return false;
+  //  } else {
+  std::vector<int64_t> v1 = vectorize(*this);
+  std::vector<int64_t> v2 = vectorize(d);
+
+  for (unsigned int i = 0; i < v1.size(); i++) {
+    if (v1[i] != v2[i]) {
+      return false;
    }
+  }

-    return true;
-    //  }
+  return true;
+  //  }
 }

 bool DDim::operator!=(DDim d) const { return !(*this == d); }

 DDim DDim::operator+(DDim d) const {
-    std::vector<int64_t> v1 = vectorize(*this);
-    std::vector<int64_t> v2 = vectorize(d);
+  std::vector<int64_t> v1 = vectorize(*this);
+  std::vector<int64_t> v2 = vectorize(d);

-    std::vector<int64_t> v3;
+  std::vector<int64_t> v3;

-    assert(v1.size() == v2.size());
+  assert(v1.size() == v2.size());

-    for (unsigned int i = 0; i < v1.size(); i++) {
-        v3.push_back(v1[i] + v2[i]);
-    }
+  for (unsigned int i = 0; i < v1.size(); i++) {
+    v3.push_back(v1[i] + v2[i]);
+  }

-    return make_ddim(v3);
+  return make_ddim(v3);
 }

 DDim DDim::operator*(DDim d) const {
-    std::vector<int64_t> v1 = vectorize(*this);
-    std::vector<int64_t> v2 = vectorize(d);
+  std::vector<int64_t> v1 = vectorize(*this);
+  std::vector<int64_t> v2 = vectorize(d);

-    std::vector<int64_t> v3;
+  std::vector<int64_t> v3;

-    assert(v1.size() == v2.size());
+  assert(v1.size() == v2.size());

-    for (unsigned int i = 0; i < v1.size(); i++) {
-        v3.push_back(v1[i] * v2[i]);
-    }
+  for (unsigned int i = 0; i < v1.size(); i++) {
+    v3.push_back(v1[i] * v2[i]);
+  }

-    return make_ddim(v3);
+  return make_ddim(v3);
 }

 int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
@@ -180,152 +178,152 @@ void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }

 /// @cond HIDDEN
 struct VectorizeVisitor : Vistor<void> {
-    std::vector<int64_t> &vector;
+  std::vector<int64_t> &vector;

-    explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}
+  explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}

-    template <typename T> void operator()(const T &t) {
-        vector.push_back(t.head);
-        this->operator()(t.tail);
-    }
+  template <typename T> void operator()(const T &t) {
+    vector.push_back(t.head);
+    this->operator()(t.tail);
+  }

-    void operator()(const Dim<0> &t) {}
+  void operator()(const Dim<0> &t) {}
 };
 /// @endcond

 std::vector<int64_t> vectorize(const DDim &ddim) {
-    std::vector<int64_t> result;
-    VectorizeVisitor visitor(result);
-    DDim::ApplyVistor(visitor, ddim);
-    return result;
+  std::vector<int64_t> result;
+  VectorizeVisitor visitor(result);
+  DDim::ApplyVistor(visitor, ddim);
+  return result;
 }

 // NOTE: framework::vectorize converts to type int64_t
 //       which does not fit cudnn inputs.
 std::vector<int> vectorize2int(const DDim &ddim) {
-    std::vector<int64_t> temp = vectorize(ddim);
-    std::vector<int> result(temp.begin(), temp.end());
-    return result;
+  std::vector<int64_t> temp = vectorize(ddim);
+  std::vector<int> result(temp.begin(), temp.end());
+  return result;
 }

 struct ProductVisitor : Vistor<int64_t> {
-    template <int D> int64_t operator()(const Dim<D> &dim) {
-        return product(dim);
-    }
+  template <int D> int64_t operator()(const Dim<D> &dim) {
+    return product(dim);
+  }
 };

 int64_t product(const DDim &ddim) {
-    ProductVisitor visitor;
-    return DDim::ApplyVistor(visitor, ddim);
+  ProductVisitor visitor;
+  return DDim::ApplyVistor(visitor, ddim);
 }

 struct SliceVectorizeVisitor : Vistor<void> {
-    std::vector<int64_t> &vector;
-    int begin;
-    int end;
-
-    SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
-        : vector(v), begin(b), end(e) {
-        //    PADDLE_ENFORCE(begin < end,
-        //                   "Begin index must be less than end index in
-        //                   ddim
-        //                   slice.");
-        //    PADDLE_ENFORCE(begin >= 0,
-        //                   "Begin index can't be less than zero in
-        //                   ddim slice.");
+  std::vector<int64_t> &vector;
+  int begin;
+  int end;
+
+  SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
+      : vector(v), begin(b), end(e) {
+    //    PADDLE_ENFORCE(begin < end,
+    //                   "Begin index must be less than end index in
+    //                   ddim
+    //                   slice.");
+    //    PADDLE_ENFORCE(begin >= 0,
+    //                   "Begin index can't be less than zero in
+    //                   ddim slice.");
+  }
+
+  template <int S> void operator()(const Dim<S> &dim) {
+    if (begin == 0) {
+      vector.push_back(dim.head);
+    } else {
+      --begin;
    }
-
-    template <int S> void operator()(const Dim<S> &dim) {
-        if (begin == 0) {
-            vector.push_back(dim.head);
-        } else {
-            --begin;
-        }
-        --end;
-        if (end > 0) {
-            this->operator()(dim.tail);
-        }
+    --end;
+    if (end > 0) {
+      this->operator()(dim.tail);
    }
+  }

-    void operator()(const Dim<0> &dim) {
-        //    PADDLE_ENFORCE(end == 0, "End index in ddim slice is out
-        //    of bound.");
-    }
+  void operator()(const Dim<0> &dim) {
+    //    PADDLE_ENFORCE(end == 0, "End index in ddim slice is out
+    //    of bound.");
+  }
 };

 DDim slice_ddim(const DDim &ddim, int begin, int end) {
-    std::vector<int64_t> vec;
-    vec.reserve(end - begin);
-    SliceVectorizeVisitor visitor(vec, begin, end);
-    //  boost::apply_visitor(visitor, dim);
-    DDim::ApplyVistor(visitor, ddim);
-    //  visitor(ddim.var.Get<Dim<4>>());
-    return make_ddim(vec);
+  std::vector<int64_t> vec;
+  vec.reserve(end - begin);
+  SliceVectorizeVisitor visitor(vec, begin, end);
+  //  boost::apply_visitor(visitor, dim);
+  DDim::ApplyVistor(visitor, ddim);
+  //  visitor(ddim.var.Get<Dim<4>>());
+  return make_ddim(vec);
 }

 /// \cond HIDDEN

 struct ArityVisitor : Vistor<int> {
-    template <int D> int operator()(Dim<D>) const { return D; }
+  template <int D> int operator()(Dim<D>) const { return D; }
 };

 /// \endcond

 int arity(const DDim &d) {
-    ArityVisitor arityVisitor = ArityVisitor();
-    return DDim::ApplyVistor(arityVisitor, d);
-    //  return arityVisitor(d.var.Get<Dim<4>>());
-    //  return boost::apply_visitor(ArityVisitor(), d); }
+  ArityVisitor arityVisitor = ArityVisitor();
+  return DDim::ApplyVistor(arityVisitor, d);
+  //  return arityVisitor(d.var.Get<Dim<4>>());
+  //  return boost::apply_visitor(ArityVisitor(), d); }
 }
 /// \cond HIDDEN

 /// \endcond

 struct OSVistor : Vistor<std::ostream &> {
-    OSVistor(std::ostream &os) : os_(os) {}
+  OSVistor(std::ostream &os) : os_(os) {}

-    template <int D> std::ostream &operator()(Dim<D> dim) const {
-        return os_ << dim;
-    }
+  template <int D> std::ostream &operator()(Dim<D> dim) const {
+    return os_ << dim;
+  }

-  private:
-    std::ostream &os_;
+private:
+  std::ostream &os_;
 };

 std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
-    auto vistor = OSVistor(os);
-    DDim::ApplyVistor(vistor, ddim);
-    return os;
+  auto vistor = OSVistor(os);
+  DDim::ApplyVistor(vistor, ddim);
+  return os;
 }

 DDim::DDim(std::initializer_list<int64_t> init_list) {
-    *this = make_ddim(init_list);
+  *this = make_ddim(init_list);
 }

 DDim flatten_to_2d(const DDim &src, int num_col_dims) {
-    int rank = src.size();
-    return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
-                      product(slice_ddim(src, num_col_dims, rank))});
+  int rank = src.size();
+  return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
+                    product(slice_ddim(src, num_col_dims, rank))});
 }

 DDim flatten_to_1d(const DDim &src) { return make_ddim({product(src)}); }

 DDim stride(const DDim &ddim) {
-    std::vector<int64_t> strides(ddim.size());
-    strides[ddim.size() - 1] = 1;
-    for (int i = ddim.size() - 2; i >= 0; --i) {
-        strides[i] = strides[i + 1] * ddim[i + 1];
-    }
-    return framework::make_ddim(strides);
+  std::vector<int64_t> strides(ddim.size());
+  strides[ddim.size() - 1] = 1;
+  for (int i = ddim.size() - 2; i >= 0; --i) {
+    strides[i] = strides[i + 1] * ddim[i + 1];
+  }
+  return framework::make_ddim(strides);
 }

 DDim stride_numel(const framework::DDim &ddim) {
-    std::vector<int64_t> strides(ddim.size());
-    strides[ddim.size() - 1] = ddim[ddim.size() - 1];
-    for (int i = ddim.size() - 2; i >= 0; --i) {
-        strides[i] = strides[i + 1] * ddim[i];
-    }
-    return framework::make_ddim(strides);
+  std::vector<int64_t> strides(ddim.size());
+  strides[ddim.size() - 1] = ddim[ddim.size() - 1];
+  for (int i = ddim.size() - 2; i >= 0; --i) {
+    strides[i] = strides[i + 1] * ddim[i];
+  }
+  return framework::make_ddim(strides);
 }

 } // namespace framework

--- a/src/framework/ddim.h
+++ b/src/framework/ddim.h
@@ -30,77 +30,77 @@ namespace framework {
 * The number of dimensions must be between [1, 9].
 */
 struct DDim {
-    typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>,
-                    Dim<7>, Dim<8>, Dim<9>>
-        DDimVar;
-    DDimVar var;
-
-    template <typename Vistor>
-    static typename Vistor::type_t ApplyVistor(Vistor vistor, const DDim &d) {
-        if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
-            return vistor(d.var.Get<Dim<0>>());
-        } else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
-            return vistor(d.var.Get<Dim<1>>());
-        } else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
-            return vistor(d.var.Get<Dim<2>>());
-        } else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
-            return vistor(d.var.Get<Dim<3>>());
-        } else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
-            return vistor(d.var.Get<Dim<4>>());
-        } else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
-            return vistor(d.var.Get<Dim<5>>());
-        } else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
-            return vistor(d.var.Get<Dim<6>>());
-        } else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
-            return vistor(d.var.Get<Dim<7>>());
-        } else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
-            return vistor(d.var.Get<Dim<8>>());
-        } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
-            return vistor(d.var.Get<Dim<9>>());
-        } else {
-            printf(" dim not support  \n");
-            throw std::bad_exception();
-            //        return typename Vistor::type_t();
-        }
+  typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>,
+                  Dim<7>, Dim<8>, Dim<9>>
+      DDimVar;
+  DDimVar var;
+
+  template <typename Vistor>
+  static typename Vistor::type_t ApplyVistor(Vistor vistor, const DDim &d) {
+    if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
+      return vistor(d.var.Get<Dim<0>>());
+    } else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
+      return vistor(d.var.Get<Dim<1>>());
+    } else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
+      return vistor(d.var.Get<Dim<2>>());
+    } else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
+      return vistor(d.var.Get<Dim<3>>());
+    } else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
+      return vistor(d.var.Get<Dim<4>>());
+    } else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
+      return vistor(d.var.Get<Dim<5>>());
+    } else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
+      return vistor(d.var.Get<Dim<6>>());
+    } else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
+      return vistor(d.var.Get<Dim<7>>());
+    } else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
+      return vistor(d.var.Get<Dim<8>>());
+    } else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
+      return vistor(d.var.Get<Dim<9>>());
+    } else {
+      printf(" dim not support  \n");
+      throw std::bad_exception();
+      //        return typename Vistor::type_t();
    }
+  }

-    DDim() { var.Set<Dim<1>>(Dim<1>()); }
+  DDim() { var.Set<Dim<1>>(Dim<1>()); }

-    template <int D> explicit DDim(const Dim<D> &in) { var.Set<Dim<D>>(in); }
+  template <int D> explicit DDim(const Dim<D> &in) { var.Set<Dim<D>>(in); }

-    /*implicit*/ DDim(std::initializer_list<int64_t> init_list);
+  /*implicit*/ DDim(std::initializer_list<int64_t> init_list);

-    template <int D> DDim &operator=(const Dim<D> &in) {
-        var.Set<Dim<D>>(in);
-        return *this;
-    }
+  template <int D> DDim &operator=(const Dim<D> &in) {
+    var.Set<Dim<D>>(in);
+    return *this;
+  }

-    int64_t &operator[](int idx);
+  int64_t &operator[](int idx);

-    int64_t operator[](int idx) const;
+  int64_t operator[](int idx) const;

-    //  template <typename Visitor>
-    //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
-    //    return var.apply_visitor(visitor);
-    //  }
-    //
-    //  template <typename Visitor>
-    //  typename Visitor::result_type apply_visitor(Visitor& visitor)
-    //  const {
-    //    return var.apply_visitor(visitor);
-    //  }
+  //  template <typename Visitor>
+  //  typename Visitor::result_type apply_visitor(Visitor& visitor) {
+  //    return var.apply_visitor(visitor);
+  //  }
+  //
+  //  template <typename Visitor>
+  //  typename Visitor::result_type apply_visitor(Visitor& visitor)
+  //  const {
+  //    return var.apply_visitor(visitor);
+  //  }

-    DDimVar getVar() { return var; }
+  DDimVar getVar() { return var; }

-    bool operator==(DDim d) const;
+  bool operator==(DDim d) const;

-    bool operator!=(DDim d) const;
+  bool operator!=(DDim d) const;

-    DDim operator+(DDim d) const;
+  DDim operator+(DDim d) const;

-    DDim operator*(DDim d) const;
+  DDim operator*(DDim d) const;

-    int size() const;
+  int size() const;
 };

 /**

--- a/src/framework/dim.h
+++ b/src/framework/dim.h
@@ -25,199 +25,197 @@ namespace framework {

 // Statically sized, statically indexed dimension
 template <int i> struct Dim {
-    static constexpr int dimensions = i;
+  static constexpr int dimensions = i;

-    template <typename... Args>
-    HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
-        static_assert(sizeof...(_tail) == i - 1,
-                      "Dim initialized with the wrong number of parameters");
-    }
+  template <typename... Args>
+  HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
+    static_assert(sizeof...(_tail) == i - 1,
+                  "Dim initialized with the wrong number of parameters");
+  }

-    HOSTDEVICE
-    Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
+  HOSTDEVICE
+  Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}

-    HOSTDEVICE
-    Dim() : head(0), tail() {}
+  HOSTDEVICE
+  Dim() : head(0), tail() {}

-    /** Construct a Dim from a linear index and size.  Uses Fortran
-     * order
-     * indexing. */
-    HOSTDEVICE
-    Dim(int64_t idx, const Dim<i> &size)
-        : head(idx % size.head), tail(idx / size.head, size.tail) {}
+  /** Construct a Dim from a linear index and size.  Uses Fortran
+   * order
+   * indexing. */
+  HOSTDEVICE
+  Dim(int64_t idx, const Dim<i> &size)
+      : head(idx % size.head), tail(idx / size.head, size.tail) {}

-    /** Construct a Dim with each dimension set to the given index */
-    HOSTDEVICE
-    Dim(int64_t idx) : head(idx), tail(idx) {}
+  /** Construct a Dim with each dimension set to the given index */
+  HOSTDEVICE
+  Dim(int64_t idx) : head(idx), tail(idx) {}

-    HOSTDEVICE
-    bool operator==(const Dim<i> &o) const {
-        return (head == o.head) && (tail == o.tail);
-    }
+  HOSTDEVICE
+  bool operator==(const Dim<i> &o) const {
+    return (head == o.head) && (tail == o.tail);
+  }

-    HOSTDEVICE
-    bool operator!=(const Dim<i> &o) const { return !(*this == o); }
+  HOSTDEVICE
+  bool operator!=(const Dim<i> &o) const { return !(*this == o); }

-    HOSTDEVICE
-    int64_t &operator[](int idx);
-    HOSTDEVICE
-    int64_t operator[](int idx) const;
+  HOSTDEVICE
+  int64_t &operator[](int idx);
+  HOSTDEVICE
+  int64_t operator[](int idx) const;

-    HOST std::string to_string() const;
+  HOST std::string to_string() const;

-    int64_t head;
-    Dim<i - 1> tail;
+  int64_t head;
+  Dim<i - 1> tail;
 };

 // Base case specialization
 template <> struct Dim<0> {
-    static constexpr int dimensions = 0;
+  static constexpr int dimensions = 0;

-    HOSTDEVICE
-    Dim(int64_t _head) {}
+  HOSTDEVICE
+  Dim(int64_t _head) {}

-    HOSTDEVICE
-    Dim() {}
+  HOSTDEVICE
+  Dim() {}

-    HOSTDEVICE
-    Dim(int idx, const Dim<0> &size) {
+  HOSTDEVICE
+  Dim(int idx, const Dim<0> &size) {
 #ifndef __CUDA_ARCH__
-        if (idx > 0) {
-            throw std::invalid_argument("Index out of range.");
-        }
+    if (idx > 0) {
+      throw std::invalid_argument("Index out of range.");
+    }
 #else
-        PADDLE_ASSERT(idx == 0);
+    PADDLE_ASSERT(idx == 0);
 #endif
-    }
+  }

-    HOSTDEVICE
-    bool operator==(const Dim<0> &o) const { return true; }
+  HOSTDEVICE
+  bool operator==(const Dim<0> &o) const { return true; }

-    HOSTDEVICE
-    bool operator!=(const Dim<0> &o) const { return false; }
+  HOSTDEVICE
+  bool operator!=(const Dim<0> &o) const { return false; }

-    HOSTDEVICE
-    int64_t &operator[](int idx);
-    HOSTDEVICE
-    int64_t operator[](int idx) const;
+  HOSTDEVICE
+  int64_t &operator[](int idx);
+  HOSTDEVICE
+  int64_t operator[](int idx) const;
 };

 namespace {

 // Helper for accessing Dim classes
 template <int i> struct DimGetter {
-    // Return a copy if Dim is const
-    template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
-        return DimGetter<i - 1>::impl(d.tail);
-    }
-    // Return a reference if Dim is mutable
-    template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
-        return DimGetter<i - 1>::impl(d.tail);
-    }
+  // Return a copy if Dim is const
+  template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
+    return DimGetter<i - 1>::impl(d.tail);
+  }
+  // Return a reference if Dim is mutable
+  template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
+    return DimGetter<i - 1>::impl(d.tail);
+  }
 };

 // Eureka! We found the element!
 template <> struct DimGetter<0> {
-    // Return a copy if Dim is const
-    template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
-        return d.head;
-    }
-    // Return a reference if Dim is mutable
-    template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
-        return d.head;
-    }
+  // Return a copy if Dim is const
+  template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
+    return d.head;
+  }
+  // Return a reference if Dim is mutable
+  template <typename D> HOSTDEVICE static int64_t &impl(D &d) { return d.head; }
 };

 template <int D> HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-    if (idx < 0) {
-        throw std::invalid_argument("Tried to access a negative dimension");
-    }
+  if (idx < 0) {
+    throw std::invalid_argument("Tried to access a negative dimension");
+  }
 #else
-    PADDLE_ASSERT(idx >= 0);
+  PADDLE_ASSERT(idx >= 0);
 #endif
-    if (idx == 0) {
-        return dim.head;
-    }
-    return indexer(dim.tail, idx - 1);
+  if (idx == 0) {
+    return dim.head;
+  }
+  return indexer(dim.tail, idx - 1);
 }

 template <> HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-    throw std::invalid_argument("Invalid index");
+  throw std::invalid_argument("Invalid index");
 #else
-    PADDLE_ASSERT(false);
+  PADDLE_ASSERT(false);
 #if CUDA_VERSION < 8000
-    // On CUDA versions previous to 8.0, only __shared__ variables
-    // could be declared as static in the device code.
-    int64_t head = 0;
+  // On CUDA versions previous to 8.0, only __shared__ variables
+  // could be declared as static in the device code.
+  int64_t head = 0;
 #else
-    static int64_t head = 0;
+  static int64_t head = 0;
 #endif
-    return head;
+  return head;
 #endif
 }

 template <int D> HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-    if (idx < 0) {
-        throw std::invalid_argument("Tried to access a negative dimension");
-    }
+  if (idx < 0) {
+    throw std::invalid_argument("Tried to access a negative dimension");
+  }
 #else
-    PADDLE_ASSERT(idx >= 0);
+  PADDLE_ASSERT(idx >= 0);
 #endif
-    if (idx == 0) {
-        return dim.head;
-    }
-    return indexer(dim.tail, idx - 1);
+  if (idx == 0) {
+    return dim.head;
+  }
+  return indexer(dim.tail, idx - 1);
 }

 template <> HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
 #ifndef __CUDA_ARCH__
-    throw std::invalid_argument("Invalid index");
+  throw std::invalid_argument("Invalid index");
 #else
-    PADDLE_ASSERT(false);
+  PADDLE_ASSERT(false);
 #if CUDA_VERSION < 8000
-    // On CUDA versions previous to 8.0, only __shared__ variables
-    // could be declared as static in the device code.
-    int64_t head = 0;
+  // On CUDA versions previous to 8.0, only __shared__ variables
+  // could be declared as static in the device code.
+  int64_t head = 0;
 #else
-    static int64_t head = 0;
+  static int64_t head = 0;
 #endif
-    return head;
+  return head;
 #endif
 }

 } // namespace
 // Static access to constant Dim
 template <int i, int l> HOSTDEVICE int64_t get(const Dim<l> &d) {
-    return DimGetter<i>::impl(d);
+  return DimGetter<i>::impl(d);
 }

 // Static access to mutable Dim
 template <int i, int l> HOSTDEVICE int64_t &get(Dim<l> &d) {
-    return DimGetter<i>::impl(d);
+  return DimGetter<i>::impl(d);
 }

 // Dynamic access to constant Dim
 template <int l> HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
-    //  std::cout << "l: " << l << std::endl;
-    return indexer(*this, i);
+  //  std::cout << "l: " << l << std::endl;
+  return indexer(*this, i);
 }

 // Dynamic access to mutable Dim
 template <int l> HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
-    return indexer(*this, i);
+  return indexer(*this, i);
 }

 // Dynamic access to constant Dim
 inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
-    return indexer(*this, i);
+  return indexer(*this, i);
 }

 // Dynamic access to mutable Dim
 inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
-    return indexer(*this, i);
+  return indexer(*this, i);
 }

 // Dynamic access to constant Dim
@@ -225,52 +223,52 @@ inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
 template <int l>
 HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
                                                               int i) {
-    return d[i];
+  return d[i];
 }

 // Dynamic access to mutable Dim
 template <int l>
 HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
                                                                 int i) {
-    return d[i];
+  return d[i];
 }

 // Dot product of two dims
 template <int i>
 HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
-    return a.head * b.head + linearize(a.tail, b.tail);
+  return a.head * b.head + linearize(a.tail, b.tail);
 }

 // Base case dot product of two Dims
 // Notice it is inline because it is no longer a template
 template <>
 HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
-    return 0;
+  return 0;
 }

 // Product of a Dim
 template <int i> HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
-    return prod * a.head * product(a.tail);
+  return prod * a.head * product(a.tail);
 }

 // Base case product of a Dim
 // Notice it is inline because it is no longer a template
 template <> HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
-    return prod;
+  return prod;
 }

 // Is 0 <= idx_i < size_i for all i?
 template <int i>
 HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
-    return ((0 <= idx.head) && (idx.head < size.head) &&
-            contained(idx.tail, size.tail));
+  return ((0 <= idx.head) && (idx.head < size.head) &&
+          contained(idx.tail, size.tail));
 }

 // Base case of is 0 <= idx_i < size_i ?
 // Notice it is inline because it is no longer a template
 template <>
 HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
-    return true;
+  return true;
 }

 /**
@@ -278,14 +276,14 @@ HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
 */
 template <int i>
 HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
-    return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
+  return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
 }

 ///\cond HIDDEN
 // Base case of ex_prefix_mul
 // Notice it is inline because it is no longer a template
 template <> HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
-    return Dim<0>();
+  return Dim<0>();
 }
 ///\endcond

@@ -293,36 +291,36 @@ template <> HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
 * Add two dimensions together
 */
 template <int i> HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
-    return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
+  return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
 }

 // Base case
 template <>
 HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
-    return Dim<0>();
+  return Dim<0>();
 }

 template <int i>
 HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
-    return dim_plus(lhs, rhs);
+  return dim_plus(lhs, rhs);
 }

 /**
 * Multiply two dimensions together
 */
 template <int i> HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
-    return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
+  return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
 }

 // Base case
 template <>
 HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
-    return Dim<0>();
+  return Dim<0>();
 }

 template <int i>
 HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
-    return dim_mult(lhs, rhs);
+  return dim_mult(lhs, rhs);
 }

 /**
@@ -337,8 +335,8 @@ HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {

 template <int i>
 HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
-    int norm_stride = size.head == 1 ? 0 : stride.head;
-    return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
+  int norm_stride = size.head == 1 ? 0 : stride.head;
+  return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
 }

 ///\cond HIDDEN
@@ -346,7 +344,7 @@ HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
 template <>
 HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
                                           const Dim<0> &stride) {
-    return Dim<0>();
+  return Dim<0>();
 }

 ///\endcond
@@ -361,7 +359,7 @@ HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,

 template <typename... Args>
 HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
-    return Dim<sizeof...(Args)>(idxes...);
+  return Dim<sizeof...(Args)>(idxes...);
 }

 // Allows us to output a Dim
@@ -369,8 +367,8 @@ HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
 template <int i>
 typename std::enable_if<(i > 1), std::ostream &>::type
 operator<<(std::ostream &os, const Dim<i> &d) {
-    os << d.head << ", " << d.tail;
-    return os;
+  os << d.head << ", " << d.tail;
+  return os;
 }

 // Base case that allows us to output a Dim
@@ -378,34 +376,34 @@ operator<<(std::ostream &os, const Dim<i> &d) {
 template <int i>
 typename std::enable_if<(i == 1), std::ostream &>::type
 operator<<(std::ostream &os, const Dim<i> &d) {
-    os << d.head;
-    return os;
+  os << d.head;
+  return os;
 }

 inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
-    return os;
+  return os;
 }

 template <int i> HOST std::string Dim<i>::to_string() const {
-    std::stringstream stream;
+  std::stringstream stream;

-    stream << *this;
+  stream << *this;

-    return stream.str();
+  return stream.str();
 }

 template <int D>
 HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
-    Dim<D> result;
+  Dim<D> result;

-    for (int i = 0; i < D - 1; ++i) {
-        result[i] = linear_index % extents[i];
-        linear_index /= extents[i];
-    }
+  for (int i = 0; i < D - 1; ++i) {
+    result[i] = linear_index % extents[i];
+    linear_index /= extents[i];
+  }

-    result[D - 1] = linear_index;
+  result[D - 1] = linear_index;

-    return result;
+  return result;
 }

 } // namespace framework

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -26,66 +26,66 @@ namespace framework {

 template <typename Dtype>
 Executor<Dtype>::Executor(const Program<Dtype> p) : program_(p) {
-    if (use_optimize_) {
-        to_predict_program_ = program_.optimizeProgram;
-    } else {
-        to_predict_program_ = program_.originProgram;
-    }
+  if (use_optimize_) {
+    to_predict_program_ = program_.optimizeProgram;
+  } else {
+    to_predict_program_ = program_.originProgram;
+  }

-    //    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-    to_predict_program_->Blocks();
-    //    for (int i = 0; i < blocks.size(); ++i) {
-    //        std::shared_ptr<BlockDesc> block_desc = blocks[i];
-    //        std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-    //        for (int j = 0; j < ops.size(); ++j) {
-    //            std::shared_ptr<OpDesc> op = ops[j];
-    //            if (op->Type() == "conv2d" && op->Input("Input")[0] ==
-    //            "pixel") {
-    //                Attribute strides_attr = op->GetAttrMap().at("strides");
-    //                std::vector<int> stride =
-    //                strides_attr.Get<std::vector<int>>(); for (int k = 0; k <
-    //                stride.size(); ++k) {
-    //                }
-    //                std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
-    //                    std::make_shared<operators::ConvOp<Dtype, float>>(
-    //                        op->Type(), op->GetInputs(), op->GetOutputs(),
-    //                        op->GetAttrMap(), program_.scope);
-    //                ops_of_block_[*block_desc.get()].push_back(conv);
-    //            }
-    //        }
-    //    }
+  //    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+  to_predict_program_->Blocks();
+  //    for (int i = 0; i < blocks.size(); ++i) {
+  //        std::shared_ptr<BlockDesc> block_desc = blocks[i];
+  //        std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+  //        for (int j = 0; j < ops.size(); ++j) {
+  //            std::shared_ptr<OpDesc> op = ops[j];
+  //            if (op->Type() == "conv2d" && op->Input("Input")[0] ==
+  //            "pixel") {
+  //                Attribute strides_attr = op->GetAttrMap().at("strides");
+  //                std::vector<int> stride =
+  //                strides_attr.Get<std::vector<int>>(); for (int k = 0; k <
+  //                stride.size(); ++k) {
+  //                }
+  //                std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
+  //                    std::make_shared<operators::ConvOp<Dtype, float>>(
+  //                        op->Type(), op->GetInputs(), op->GetOutputs(),
+  //                        op->GetAttrMap(), program_.scope);
+  //                ops_of_block_[*block_desc.get()].push_back(conv);
+  //            }
+  //        }
+  //    }
 }

 template <typename Dtype>
 std::shared_ptr<Tensor> Executor<Dtype>::predict(Tensor &t) {
-    // feed
-    auto scope = program_.scope;
-    Variable *g_feed_value = scope->Var("pixel");
-    auto tensor = g_feed_value->GetMutable<Tensor>();
-    tensor->ShareDataWith(t);
+  // feed
+  auto scope = program_.scope;
+  Variable *g_feed_value = scope->Var("pixel");
+  auto tensor = g_feed_value->GetMutable<Tensor>();
+  tensor->ShareDataWith(t);

-    Variable *con_output = scope->Var("conv2d_0.tmp_0");
-    Tensor *output_tensor = con_output->GetMutable<Tensor>();
-    output_tensor->mutable_data<float>({1, 16, 32, 32});
-    //  std::cout << typeid(output_tensor).name() << std::endl;
-    //  std::cout << "output_tensor dims: " << output_tensor->dims() <<
-    //  std::endl;
+  Variable *con_output = scope->Var("conv2d_0.tmp_0");
+  Tensor *output_tensor = con_output->GetMutable<Tensor>();
+  output_tensor->mutable_data<float>({1, 16, 32, 32});
+  //  std::cout << typeid(output_tensor).name() << std::endl;
+  //  std::cout << "output_tensor dims: " << output_tensor->dims() <<
+  //  std::endl;

-    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-    out_tensor.reset(output_tensor);
+  std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+  out_tensor.reset(output_tensor);

-    predict(t, 0);
-    return out_tensor;
+  predict(t, 0);
+  return out_tensor;
 }

 template <typename Dtype>
 void Executor<Dtype>::predict(const Tensor &t, int block_id) {
-    std::shared_ptr<BlockDesc> to_predict_block =
-        to_predict_program_->Block(block_id);
-    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
-        auto op = ops_of_block_[*to_predict_block.get()][j];
-        op->Run();
-    }
+  std::shared_ptr<BlockDesc> to_predict_block =
+      to_predict_program_->Block(block_id);
+  for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+    auto op = ops_of_block_[*to_predict_block.get()][j];
+    op->Run();
+  }
 }

 template class Executor<CPU>;

--- a/src/framework/executor.h
+++ b/src/framework/executor.h
@@ -35,23 +35,23 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class Executor {
-  public:
-    Executor();
+public:
+  Executor();

-    Executor(const Program<Dtype> p);
+  Executor(const Program<Dtype> p);

-    std::shared_ptr<Tensor> predict(Tensor &t);
+  std::shared_ptr<Tensor> predict(Tensor &t);

-  public:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
+public:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;

-    void predict(const Tensor &t, int block_id);
+  void predict(const Tensor &t, int block_id);

-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
 };

 } // namespace framework

--- a/src/framework/framework.pb.cpp
+++ b/src/framework/framework.pb.cpp
--- a/src/framework/framework.pb.h
+++ b/src/framework/framework.pb.h
--- a/src/framework/lod_tensor.cc
+++ b/src/framework/lod_tensor.cc
@@ -22,291 +22,289 @@ namespace paddle_mobile {
 namespace framework {

 std::ostream &operator<<(std::ostream &os, const LoD &lod) {
+  os << "{";
+  for (auto &v : lod) {
    os << "{";
-    for (auto &v : lod) {
-        os << "{";
-        bool is_first = true;
-        for (auto &i : v) {
-            if (is_first) {
-                os << i;
-                is_first = false;
-            } else {
-                os << ", " << i;
-            }
-        }
-        os << "}";
+    bool is_first = true;
+    for (auto &i : v) {
+      if (is_first) {
+        os << i;
+        is_first = false;
+      } else {
+        os << ", " << i;
+      }
    }
    os << "}";
+  }
+  os << "}";

-    return os;
+  return os;
 }

 std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-    //  PADDLE_ENFORCE(t.type().hash_code() ==
-    //  typeid(float).hash_code());
-
-    //  if (!platform::is_cpu_place(t.place())) {
-    //    LoDTensor tt;
-    //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
-    //    platform::DeviceContextPool &pool =
-    //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
-    //    *pool.Get(t.place()); dev_ctx.Wait();
-    //
-    //    os << tt;
-    //    return os;
-    //  }
-
-    os << "dim: " << t.dims() << "\n";
-    os << "lod: " << t.lod() << "\n";
-
-    // only print first ten elements
-    int64_t size = t.numel() < 10 ? t.numel() : 10;
-    for (int64_t i = 0; i < size; ++i) {
-        os << t.data<float>()[i] << " ";
-    }
-
-    return os;
+  //  PADDLE_ENFORCE(t.type().hash_code() ==
+  //  typeid(float).hash_code());
+
+  //  if (!platform::is_cpu_place(t.place())) {
+  //    LoDTensor tt;
+  //    framework::TensorCopy(t, platform::CPUPlace(), &tt);
+  //    platform::DeviceContextPool &pool =
+  //    platform::DeviceContextPool::Instance(); auto &dev_ctx =
+  //    *pool.Get(t.place()); dev_ctx.Wait();
+  //
+  //    os << tt;
+  //    return os;
+  //  }
+
+  os << "dim: " << t.dims() << "\n";
+  os << "lod: " << t.lod() << "\n";
+
+  // only print first ten elements
+  int64_t size = t.numel() < 10 ? t.numel() : 10;
+  for (int64_t i = 0; i < size; ++i) {
+    os << t.data<float>()[i] << " ";
+  }
+
+  return os;
 }

 std::string LoDToString(const LoD &lod) {
-    std::ostringstream stream;
-    stream << lod;
-    return stream.str();
+  std::ostringstream stream;
+  stream << lod;
+  return stream.str();
 }

 LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
                 size_t elem_end) {
-    //  PADDLE_ENFORCE_LT(level, in.size());
-    //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
-
-    LoD res;
-    res.resize(in.size() - level);
-    // copy the first level
-    res[0].assign(in[level].begin() + elem_begin,
-                  in[level].begin() + elem_end + 1);
-    for (size_t lvl = 1; lvl < res.size(); lvl++) {
-        const auto &in_level = in[level + lvl];
-        const auto &above_level = res[lvl - 1];
-        auto &out_level = res[lvl];
-        out_level.assign(in_level.begin() + above_level.front(),
-                         in_level.begin() + above_level.back() + 1);
-    }
-    for (size_t lvl = 0; lvl < res.size(); lvl++) {
-        // to make the first offset equals 0, all the elements minus the
-        // first
-        // element
-        size_t front = res[lvl].front();
-        for (auto &ele : res[lvl]) {
-            ele -= front;
-        }
+  //  PADDLE_ENFORCE_LT(level, in.size());
+  //  PADDLE_ENFORCE_LT(elem_end, in[level].size());
+
+  LoD res;
+  res.resize(in.size() - level);
+  // copy the first level
+  res[0].assign(in[level].begin() + elem_begin,
+                in[level].begin() + elem_end + 1);
+  for (size_t lvl = 1; lvl < res.size(); lvl++) {
+    const auto &in_level = in[level + lvl];
+    const auto &above_level = res[lvl - 1];
+    auto &out_level = res[lvl];
+    out_level.assign(in_level.begin() + above_level.front(),
+                     in_level.begin() + above_level.back() + 1);
+  }
+  for (size_t lvl = 0; lvl < res.size(); lvl++) {
+    // to make the first offset equals 0, all the elements minus the
+    // first
+    // element
+    size_t front = res[lvl].front();
+    for (auto &ele : res[lvl]) {
+      ele -= front;
    }
-    return res;
+  }
+  return res;
 }

 LoD ToAbsOffset(const LoD &in) {
-    // the lowest level stores relative offsets
-    if (in.empty() || in.size() == 1)
-        return in;
-    LoD result = in;
-    for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
-        for (size_t i = 0; i < in[level].size(); ++i) {
-            size_t index = in[level][i];
-            result[level][i] = result[level + 1][index];
-        }
+  // the lowest level stores relative offsets
+  if (in.empty() || in.size() == 1)
+    return in;
+  LoD result = in;
+  for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
+    for (size_t i = 0; i < in[level].size(); ++i) {
+      size_t index = in[level][i];
+      result[level][i] = result[level + 1][index];
    }
-    return result;
+  }
+  return result;
 }

 bool operator==(const LoD &a, const LoD &b) {
-    if (a.size() != b.size()) {
-        return false;
+  if (a.size() != b.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < a.size(); i++) {
+    const auto &a_level = a[i];
+    const auto &b_level = b[i];
+    if (a_level.size() != b_level.size()) {
+      return false;
    }
-
-    for (size_t i = 0; i < a.size(); i++) {
-        const auto &a_level = a[i];
-        const auto &b_level = b[i];
-        if (a_level.size() != b_level.size()) {
-            return false;
-        }
-        for (size_t j = 0; j < a_level.size(); j++) {
-            if (a_level[j] != b_level[j]) {
-                return false;
-            }
-        }
+    for (size_t j = 0; j < a_level.size(); j++) {
+      if (a_level[j] != b_level[j]) {
+        return false;
+      }
    }
-    return true;
+  }
+  return true;
 }

 bool CheckLoD(const LoD &in, int tensor_height) {
-    if (in.empty())
-        return true;
-    for (const auto &level : in) {
-        // check: there should be more than 2 offsets existing in each
-        // level.
-        if (level.size() < 2)
-            return false;
-        // check: the first offset(the begin offset) of each level
-        // should be 0.
-        if (level.front() != 0)
-            return false;
-        // check: all the offsets in a level should be ascending(no same
-        // items
-        // allows).
-        if (!std::is_sorted(level.begin(), level.begin(),
-                            [](size_t a, size_t b) {
-                                if (a < b)
-                                    return true;
-                                return false;
-                            })) {
-            std::cout << "ascending error";
-            return false;
-        }
-    }
-    // check: the lowest level's last offset should equals
-    // `tensor_height` if
-    //        tensor_height>0.
-    if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
-        return false;
-
-    // check: the higher level's last offset should equals the lower
-    // level's
-    // size-1.
-    // NOTE LoD store the levels from top to bottom, so the higher level
-    // goes
-    // first.
-    for (size_t level = 0; level < in.size() - 1; level++) {
-        if (in[level].back() != in[level + 1].size() - 1)
-            return false;
-    }
+  if (in.empty())
    return true;
+  for (const auto &level : in) {
+    // check: there should be more than 2 offsets existing in each
+    // level.
+    if (level.size() < 2)
+      return false;
+    // check: the first offset(the begin offset) of each level
+    // should be 0.
+    if (level.front() != 0)
+      return false;
+    // check: all the offsets in a level should be ascending(no same
+    // items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b)
+            return true;
+          return false;
+        })) {
+      std::cout << "ascending error";
+      return false;
+    }
+  }
+  // check: the lowest level's last offset should equals
+  // `tensor_height` if
+  //        tensor_height>0.
+  if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
+    return false;
+
+  // check: the higher level's last offset should equals the lower
+  // level's
+  // size-1.
+  // NOTE LoD store the levels from top to bottom, so the higher level
+  // goes
+  // first.
+  for (size_t level = 0; level < in.size() - 1; level++) {
+    if (in[level].back() != in[level + 1].size() - 1)
+      return false;
+  }
+  return true;
 }

 bool CheckAbsLoD(const LoD &in, int tensor_height) {
-    if (in.empty())
-        return true;
-    for (const auto &level : in) {
-        // check: all the offsets in a level should be ascending(no same
-        // items
-        // allows).
-        if (!std::is_sorted(level.begin(), level.begin(),
-                            [](size_t a, size_t b) {
-                                if (a < b)
-                                    return true;
-                                return false;
-                            })) {
-            return false;
-        }
-
-        // check: there should be more than 2 offsets existing in each
-        // level.
-        if (level.size() < 2)
-            return false;
-
-        // check: the first offset of each level should be 0, and the
-        // last should be
-        // the same(the height of underlying tensor).
-        if (level.front() != 0)
-            return false;
-        if (tensor_height < 0) {
-            tensor_height = level.back();
-        } else if ((size_t)tensor_height != level.back()) {
-            return false;
-        }
-    }
+  if (in.empty())
    return true;
+  for (const auto &level : in) {
+    // check: all the offsets in a level should be ascending(no same
+    // items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b)
+            return true;
+          return false;
+        })) {
+      return false;
+    }
+
+    // check: there should be more than 2 offsets existing in each
+    // level.
+    if (level.size() < 2)
+      return false;
+
+    // check: the first offset of each level should be 0, and the
+    // last should be
+    // the same(the height of underlying tensor).
+    if (level.front() != 0)
+      return false;
+    if (tensor_height < 0) {
+      tensor_height = level.back();
+    } else if ((size_t)tensor_height != level.back()) {
+      return false;
+    }
+  }
+  return true;
 }

 using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;

 LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
                                        size_t end_idx, size_t start_level) {
-    LoD sub_lod;
-
-    for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
-        //    PADDLE_ENFORCE_LE(start_idx, end_idx);
-        //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
-        std::vector<size_t> level_lens;
-        for (size_t i = start_idx; i < end_idx; ++i) {
-            level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
-        }
-        sub_lod.emplace_back(level_lens);
-        start_idx = lod[level_idx][start_idx];
-        end_idx = lod[level_idx][end_idx];
+  LoD sub_lod;
+
+  for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
+    //    PADDLE_ENFORCE_LE(start_idx, end_idx);
+    //    PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
+    std::vector<size_t> level_lens;
+    for (size_t i = start_idx; i < end_idx; ++i) {
+      level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
    }
+    sub_lod.emplace_back(level_lens);
+    start_idx = lod[level_idx][start_idx];
+    end_idx = lod[level_idx][end_idx];
+  }

-    return LoDAndOffset{sub_lod, {start_idx, end_idx}};
+  return LoDAndOffset{sub_lod, {start_idx, end_idx}};
 }

 void AppendLoD(LoD *lod, const LoD &lod_length) {
-    //  PADDLE_ENFORCE(
-    //      lod->empty() || lod->size() == lod_length.size(),
-    //      "The lod_length should has the same size with the appended
-    //      lod.");
-    if (lod->empty()) {
-        for (size_t i = 0; i < lod_length.size(); ++i) {
-            lod->emplace_back(1, 0); // size = 1, value = 0;
-        }
-        *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
+  //  PADDLE_ENFORCE(
+  //      lod->empty() || lod->size() == lod_length.size(),
+  //      "The lod_length should has the same size with the appended
+  //      lod.");
+  if (lod->empty()) {
+    for (size_t i = 0; i < lod_length.size(); ++i) {
+      lod->emplace_back(1, 0); // size = 1, value = 0;
    }
-    for (size_t i = 0; i < lod->size(); ++i) {
-        auto &level = (*lod)[i];
-        for (size_t len : lod_length[i]) {
-            level.push_back(level.back() + len);
-        }
+    *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
+  }
+  for (size_t i = 0; i < lod->size(); ++i) {
+    auto &level = (*lod)[i];
+    for (size_t len : lod_length[i]) {
+      level.push_back(level.back() + len);
    }
+  }
 }

 void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
-    { // the 1st field, uint32_t version for LoDTensor
-        constexpr uint32_t version = 0;
-        os.write(reinterpret_cast<const char *>(&version), sizeof(version));
+  { // the 1st field, uint32_t version for LoDTensor
+    constexpr uint32_t version = 0;
+    os.write(reinterpret_cast<const char *>(&version), sizeof(version));
+  }
+  {
+    // the 2st field, LoD information
+    // uint64_t lod_level
+    // uint64_t lod_level_1 size in byte.
+    // int*     lod_level_1 data
+    // ...
+    auto lod = tensor.lod();
+    uint64_t size = lod.size();
+    os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+
+    for (auto &each : lod) {
+      size = each.size() * sizeof(framework::LoD::value_type::value_type);
+      os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+      os.write(reinterpret_cast<const char *>(each.data()),
+               static_cast<std::streamsize>(size));
    }
-    {
-        // the 2st field, LoD information
-        // uint64_t lod_level
-        // uint64_t lod_level_1 size in byte.
-        // int*     lod_level_1 data
-        // ...
-        auto lod = tensor.lod();
-        uint64_t size = lod.size();
-        os.write(reinterpret_cast<const char *>(&size), sizeof(size));
-
-        for (auto &each : lod) {
-            size = each.size() * sizeof(framework::LoD::value_type::value_type);
-            os.write(reinterpret_cast<const char *>(&size), sizeof(size));
-            os.write(reinterpret_cast<const char *>(each.data()),
-                     static_cast<std::streamsize>(size));
-        }
-    }
-    // the 3st field, Tensor
-    TensorToStream(os, static_cast<Tensor>(tensor));
+  }
+  // the 3st field, Tensor
+  TensorToStream(os, static_cast<Tensor>(tensor));
 }

 void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
-    {
-        // the 1st field, unit32_t version for LoDTensor
-        uint32_t version;
-        is.read(reinterpret_cast<char *>(&version), sizeof(version));
-        //    PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is
-        //    supported");
-    }
-    {
-        // the 2st field, LoD information
-        uint64_t lod_level;
-        is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
-        auto &lod = *tensor->mutable_lod();
-        lod.resize(lod_level);
-        for (uint64_t i = 0; i < lod_level; ++i) {
-            uint64_t size;
-            is.read(reinterpret_cast<char *>(&size), sizeof(size));
-            std::vector<size_t> tmp(size / sizeof(size_t));
-            is.read(reinterpret_cast<char *>(tmp.data()),
-                    static_cast<std::streamsize>(size));
-            lod[i] = tmp;
-        }
+  {
+    // the 1st field, unit32_t version for LoDTensor
+    uint32_t version;
+    is.read(reinterpret_cast<char *>(&version), sizeof(version));
+    //    PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is
+    //    supported");
+  }
+  {
+    // the 2st field, LoD information
+    uint64_t lod_level;
+    is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+    auto &lod = *tensor->mutable_lod();
+    lod.resize(lod_level);
+    for (uint64_t i = 0; i < lod_level; ++i) {
+      uint64_t size;
+      is.read(reinterpret_cast<char *>(&size), sizeof(size));
+      std::vector<size_t> tmp(size / sizeof(size_t));
+      is.read(reinterpret_cast<char *>(tmp.data()),
+              static_cast<std::streamsize>(size));
+      lod[i] = tmp;
    }
-    // the 3st filed, Tensor
-    TensorFromStream(is, static_cast<Tensor *>(tensor));
+  }
+  // the 3st filed, Tensor
+  TensorFromStream(is, static_cast<Tensor *>(tensor));
 }

 } // namespace framework

--- a/src/framework/lod_tensor.h
+++ b/src/framework/lod_tensor.h
@@ -102,45 +102,45 @@ bool CheckAbsLoD(const LoD &in, int tensor_height = -1);
 * see https://en.wikipedia.org/wiki/Level_of_details for reference.
 */
 class LoDTensor : public Tensor {
-  public:
-    LoDTensor() : Tensor() {}
-
-    explicit LoDTensor(const LoD &lod) : lod_(lod) {}
-
-    void set_lod(const LoD &lod) { lod_ = lod; }
-
-    const LoD &lod() const { return lod_; }
-
-    LoD *mutable_lod() { return &lod_; }
-
-    /*
-     * Get the start offset and end offset of an  element from LoD.
-     */
-    std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
-        //    PADDLE_ENFORCE_LT(level, NumLevels());
-        //    PADDLE_ENFORCE_LT(elem, NumElements(level));
-        return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
-    }
-
-    /*
-     * Number of LoDTensor's levels, each level has units of data, for
-     * example,
-     * in the sentence's view, article, paragraph, sentence are 3
-     * levels.
-     */
-    size_t NumLevels() const { return lod_.size(); }
-
-    /*
-     * Number of elements in a level.
-     */
-    size_t NumElements(size_t level = 0) const {
-        //    PADDLE_ENFORCE_LT(level, NumLevels());
-        // the last offset is the end of last element
-        return (lod_)[level].size() - 1;
-    }
-
-  private:
-    LoD lod_;
+public:
+  LoDTensor() : Tensor() {}
+
+  explicit LoDTensor(const LoD &lod) : lod_(lod) {}
+
+  void set_lod(const LoD &lod) { lod_ = lod; }
+
+  const LoD &lod() const { return lod_; }
+
+  LoD *mutable_lod() { return &lod_; }
+
+  /*
+   * Get the start offset and end offset of an  element from LoD.
+   */
+  std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
+    //    PADDLE_ENFORCE_LT(level, NumLevels());
+    //    PADDLE_ENFORCE_LT(elem, NumElements(level));
+    return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
+  }
+
+  /*
+   * Number of LoDTensor's levels, each level has units of data, for
+   * example,
+   * in the sentence's view, article, paragraph, sentence are 3
+   * levels.
+   */
+  size_t NumLevels() const { return lod_.size(); }
+
+  /*
+   * Number of elements in a level.
+   */
+  size_t NumElements(size_t level = 0) const {
+    //    PADDLE_ENFORCE_LT(level, NumLevels());
+    // the last offset is the end of last element
+    return (lod_)[level].size() - 1;
+  }
+
+private:
+  LoD lod_;
 };

 /*
@@ -155,26 +155,26 @@ class LoDTensor : public Tensor {
 */
 template <typename T>
 LoDTensor LodExpand(const LoDTensor &source, const LoD &lod, size_t level) {
-    LoD abs_lod = ToAbsOffset(lod);
-    const auto &lod_level = lod[level];
-    size_t num_instances = source.dims()[0];
-
-    // new tensor
-    LoDTensor tensor;
-    tensor.set_lod(lod);
-    auto dims = source.dims();
-    dims[0] = lod_level.back();
-    tensor.Resize(dims);
-    tensor.mutable_data<T>();
-
-    //  PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
-    for (size_t ins = 0; ins < num_instances; ins++) {
-        for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
-            auto slice = tensor.Slice(elem, elem + 1);
-            TensorCopy(source.Slice(ins, ins + 1), &slice);
-        }
+  LoD abs_lod = ToAbsOffset(lod);
+  const auto &lod_level = lod[level];
+  size_t num_instances = source.dims()[0];
+
+  // new tensor
+  LoDTensor tensor;
+  tensor.set_lod(lod);
+  auto dims = source.dims();
+  dims[0] = lod_level.back();
+  tensor.Resize(dims);
+  tensor.mutable_data<T>();
+
+  //  PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
+  for (size_t ins = 0; ins < num_instances; ins++) {
+    for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
+      auto slice = tensor.Slice(elem, elem + 1);
+      TensorCopy(source.Slice(ins, ins + 1), &slice);
    }
-    return tensor;
+  }
+  return tensor;
 }

 // Get the absolute offset of a lod[start_level][start_idx:end_idx] and

--- a/src/framework/op_desc.cpp
+++ b/src/framework/op_desc.cpp
@@ -8,51 +8,51 @@ namespace paddle_mobile {
 namespace framework {

 OpDesc::OpDesc(const proto::OpDesc &desc) : desc_(desc) {
-    for (int i = 0; i < desc_.inputs_size(); ++i) {
-        const proto::OpDesc::Var &var = desc_.inputs(i);
-        std::vector<std::string> &args = inputs_[var.parameter()];
-        int arg_size = var.arguments_size();
-        for (int j = 0; j < arg_size; ++j) {
-            args.push_back(var.arguments(j));
-        }
+  for (int i = 0; i < desc_.inputs_size(); ++i) {
+    const proto::OpDesc::Var &var = desc_.inputs(i);
+    std::vector<std::string> &args = inputs_[var.parameter()];
+    int arg_size = var.arguments_size();
+    for (int j = 0; j < arg_size; ++j) {
+      args.push_back(var.arguments(j));
    }
-
-    for (int i = 0; i < desc_.outputs_size(); ++i) {
-        const proto::OpDesc::Var &var = desc_.outputs(i);
-        std::vector<std::string> &args = outputs_[var.parameter()];
-        int arg_size = var.arguments_size();
-        for (int j = 0; j < arg_size; ++j) {
-            args.push_back(var.arguments(j));
-        }
+  }
+
+  for (int i = 0; i < desc_.outputs_size(); ++i) {
+    const proto::OpDesc::Var &var = desc_.outputs(i);
+    std::vector<std::string> &args = outputs_[var.parameter()];
+    int arg_size = var.arguments_size();
+    for (int j = 0; j < arg_size; ++j) {
+      args.push_back(var.arguments(j));
    }
-
-    for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
-        std::string attr_name = attr.name();
-        if (attr.type() != proto::AttrType::BLOCK) {
-            attrs_[attr_name] = Attribute::GetAttrValue(attr);
-            //      if (attr.type() == proto::AttrType::INT){
-            //        std::cout << " attrName " << attr_name << " " <<
-            //        attrs_[attr_name].Get<int>() << std::endl;
-            //      }
-        }
+  }
+
+  for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
+    std::string attr_name = attr.name();
+    if (attr.type() != proto::AttrType::BLOCK) {
+      attrs_[attr_name] = Attribute::GetAttrValue(attr);
+      //      if (attr.type() == proto::AttrType::INT){
+      //        std::cout << " attrName " << attr_name << " " <<
+      //        attrs_[attr_name].Get<int>() << std::endl;
+      //      }
    }
+  }
 }

 const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
-    return inputs_.find(name)->second;
+  return inputs_.find(name)->second;
 }

 const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
-    return outputs_.find(name)->second;
+  return outputs_.find(name)->second;
 }

 Attribute OpDesc::GetAttr(const std::string &name) const {
-    auto it = attrs_.find(name);
-    return it->second;
+  auto it = attrs_.find(name);
+  return it->second;
 }

 const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
-    return attrs_;
+  return attrs_;
 }

 } // namespace framework

--- a/src/framework/op_desc.h
+++ b/src/framework/op_desc.h
@@ -26,25 +26,25 @@ namespace paddle_mobile {
 namespace framework {

 class OpDesc : PaddleMobileObject {
-  public:
-    OpDesc(const proto::OpDesc &desc);
-    const std::vector<std::string> &Input(const std::string &name) const;
-    const std::vector<std::string> &Output(const std::string &name) const;
-    Attribute GetAttr(const std::string &name) const;
+public:
+  OpDesc(const proto::OpDesc &desc);
+  const std::vector<std::string> &Input(const std::string &name) const;
+  const std::vector<std::string> &Output(const std::string &name) const;
+  Attribute GetAttr(const std::string &name) const;

-    const VariableNameMap &GetInputs() { return inputs_; }
+  const VariableNameMap &GetInputs() { return inputs_; }

-    const VariableNameMap &GetOutputs() { return outputs_; }
+  const VariableNameMap &GetOutputs() { return outputs_; }

-    const AttributeMap &GetAttrMap() const;
+  const AttributeMap &GetAttrMap() const;

-    const std::string &Type() { return desc_.type(); };
+  const std::string &Type() { return desc_.type(); };

-  private:
-    proto::OpDesc desc_;
-    VariableNameMap inputs_;
-    VariableNameMap outputs_;
-    AttributeMap attrs_;
+private:
+  proto::OpDesc desc_;
+  VariableNameMap inputs_;
+  VariableNameMap outputs_;
+  AttributeMap attrs_;
 };

 } // namespace framework

--- a/src/framework/op_info.h
+++ b/src/framework/op_info.h
@@ -25,13 +25,13 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> struct OpInfo {
-    OpCreator<Dtype> creator_;
-    const OpCreator<Dtype> &Creator() const {
-        //    PADDLE_ENFORCE_NOT_NULL(creator_,
-        //                            "Operator Creator has not been
-        //                            registered");
-        return creator_;
-    }
+  OpCreator<Dtype> creator_;
+  const OpCreator<Dtype> &Creator() const {
+    //    PADDLE_ENFORCE_NOT_NULL(creator_,
+    //                            "Operator Creator has not been
+    //                            registered");
+    return creator_;
+  }
 };

 template <typename Dtype> class OpInfoMap;
@@ -39,55 +39,55 @@ template <typename Dtype> class OpInfoMap;
 template <typename Dtype> static OpInfoMap<Dtype> *g_op_info_map = nullptr;

 template <typename Dtype> class OpInfoMap {
-  public:
-    static OpInfoMap &Instance() {
-        if (g_op_info_map<Dtype> == nullptr) {
-            g_op_info_map<Dtype> = new OpInfoMap();
-        }
-        return *g_op_info_map<Dtype>;
-    };
-
-    bool Has(const std::string &op_type) const {
-        return map_.find(op_type) != map_.end();
-    }
-
-    void Insert(const std::string &type, const OpInfo<Dtype> &info) {
-        //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
-        //    registered", type);
-        map_.insert({type, info});
-    }
-
-    const OpInfo<Dtype> &Get(const std::string &type) const {
-        auto op_info_ptr = GetNullable(type);
-        //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
-        //    been
-        //    registered",
-        //                            type);
-        return *op_info_ptr;
+public:
+  static OpInfoMap &Instance() {
+    if (g_op_info_map<Dtype> == nullptr) {
+      g_op_info_map<Dtype> = new OpInfoMap();
    }
-
-    const OpInfo<Dtype> *GetNullable(const std::string &type) const {
-        auto it = map_.find(type);
-        if (it == map_.end()) {
-            return nullptr;
-        } else {
-            return &it->second;
-        }
+    return *g_op_info_map<Dtype>;
+  };
+
+  bool Has(const std::string &op_type) const {
+    return map_.find(op_type) != map_.end();
+  }
+
+  void Insert(const std::string &type, const OpInfo<Dtype> &info) {
+    //    PADDLE_ENFORCE(!Has(type), "Operator %s has been
+    //    registered", type);
+    map_.insert({type, info});
+  }
+
+  const OpInfo<Dtype> &Get(const std::string &type) const {
+    auto op_info_ptr = GetNullable(type);
+    //    PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
+    //    been
+    //    registered",
+    //                            type);
+    return *op_info_ptr;
+  }
+
+  const OpInfo<Dtype> *GetNullable(const std::string &type) const {
+    auto it = map_.find(type);
+    if (it == map_.end()) {
+      return nullptr;
+    } else {
+      return &it->second;
    }
+  }

-    const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
-        return map_;
-    }
+  const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
+    return map_;
+  }

-    std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
-        return &map_;
-    }
+  std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
+    return &map_;
+  }

-  private:
-    OpInfoMap() = default;
-    std::unordered_map<std::string, OpInfo<Dtype>> map_;
+private:
+  OpInfoMap() = default;
+  std::unordered_map<std::string, OpInfo<Dtype>> map_;

-    //  DISABLE_COPY_AND_ASSIGN(OpInfoMap);
+  //  DISABLE_COPY_AND_ASSIGN(OpInfoMap);
 };

 } // namespace framework

--- a/src/framework/op_kernel_type.h
+++ b/src/framework/op_kernel_type.h
@@ -24,41 +24,40 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace framework {
 struct OpKernelType {
-    struct Hash {
-        size_t operator()(const OpKernelType &key) const {
-            int data_type = static_cast<int>(key.data_type_) << LEFT_SHIFT;
-            int data_layout = static_cast<int>(key.data_layout_)
-                              << (LEFT_SHIFT * 2);
+  struct Hash {
+    size_t operator()(const OpKernelType &key) const {
+      int data_type = static_cast<int>(key.data_type_) << LEFT_SHIFT;
+      int data_layout = static_cast<int>(key.data_layout_) << (LEFT_SHIFT * 2);

-            std::hash<int> hasher;
-            return hasher(data_type + data_layout);
-        }
-    };
+      std::hash<int> hasher;
+      return hasher(data_type + data_layout);
+    }
+  };

-    // place, data_type, library_type kinds less than 2^8
-    constexpr static int LEFT_SHIFT = 8;
+  // place, data_type, library_type kinds less than 2^8
+  constexpr static int LEFT_SHIFT = 8;

-    proto::VarType::Type data_type_;
-    DataLayout data_layout_;
+  proto::VarType::Type data_type_;
+  DataLayout data_layout_;

-    OpKernelType(proto::VarType::Type data_type,
-                 DataLayout data_layout = DataLayout::kAnyLayout)
-        : data_type_(data_type), data_layout_(data_layout) {}
+  OpKernelType(proto::VarType::Type data_type,
+               DataLayout data_layout = DataLayout::kAnyLayout)
+      : data_type_(data_type), data_layout_(data_layout) {}

-    bool operator==(const OpKernelType &o) const {
-        return data_type_ == o.data_type_ && data_layout_ == o.data_layout_;
-    }
+  bool operator==(const OpKernelType &o) const {
+    return data_type_ == o.data_type_ && data_layout_ == o.data_layout_;
+  }

-    bool operator!=(const OpKernelType &o) const { return !(*this == o); }
+  bool operator!=(const OpKernelType &o) const { return !(*this == o); }
 };

 inline bool NeedTransformLayout(const DataLayout &l, const DataLayout &r) {
-    return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
+  return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
 }

 inline bool TransFromNeeded(const OpKernelType &l, const OpKernelType &r) {
-    return (l.data_type_ != r.data_type_) ||
-           NeedTransformLayout(l.data_layout_, r.data_layout_);
+  return (l.data_type_ != r.data_type_) ||
+         NeedTransformLayout(l.data_layout_, r.data_layout_);
 }

 } // namespace framework

--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -30,7 +30,7 @@ OperatorBase<Dtype>::OperatorBase(const std::string &type,
                                  std::shared_ptr<Scope> scope)
    : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs),
      scope_(scope) {
-    CheckAllInputOutputSet();
+  CheckAllInputOutputSet();
 }
 template <typename Dtype>
 void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}

--- a/src/framework/operator.h
+++ b/src/framework/operator.h
@@ -49,50 +49,50 @@ static std::unordered_map<
                           {"fetch", {{"X"}, {"Out"}}}};

 template <typename Dtype> class OperatorBase : PaddleMobileObject {
-  public:
-    OperatorBase(const std::string &type, const VariableNameMap &inputs,
-                 const VariableNameMap &outputs, const AttributeMap &attrs,
-                 std::shared_ptr<Scope> scope);
-    virtual ~OperatorBase() {}
-    virtual void Run() const = 0;
+public:
+  OperatorBase(const std::string &type, const VariableNameMap &inputs,
+               const VariableNameMap &outputs, const AttributeMap &attrs,
+               std::shared_ptr<Scope> scope);
+  virtual ~OperatorBase() {}
+  virtual void Run() const = 0;

-    const VariableNameMap &Inputs() const { return inputs_; }
-    const VariableNameMap &Outputs() const { return outputs_; }
-    const std::string &Type() const { return type_; }
-    const AttributeMap &Attrs() const { return attrs_; }
-    void ClearVariables(const std::vector<std::string> &var_names) const {
-        if (this->scope_) {
-            this->scope_->EraseVars(var_names);
-        }
+  const VariableNameMap &Inputs() const { return inputs_; }
+  const VariableNameMap &Outputs() const { return outputs_; }
+  const std::string &Type() const { return type_; }
+  const AttributeMap &Attrs() const { return attrs_; }
+  void ClearVariables(const std::vector<std::string> &var_names) const {
+    if (this->scope_) {
+      this->scope_->EraseVars(var_names);
    }
+  }

-  protected:
-    std::shared_ptr<Scope> scope_;
-    std::string type_;
-    VariableNameMap inputs_;
-    VariableNameMap outputs_;
-    AttributeMap attrs_;
+protected:
+  std::shared_ptr<Scope> scope_;
+  std::string type_;
+  VariableNameMap inputs_;
+  VariableNameMap outputs_;
+  AttributeMap attrs_;

-  private:
-    void CheckAllInputOutputSet() const;
+private:
+  void CheckAllInputOutputSet() const;
 };

 template <typename Dtype>
 class OperatorWithKernel : public OperatorBase<Dtype> {
-  public:
-    OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
-                       const VariableNameMap &outputs,
-                       const AttributeMap &attrs, std::shared_ptr<Scope> scope)
-        : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
-    virtual void InferShape() const = 0;
-    virtual void Run() const = 0;
+public:
+  OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
+                     const VariableNameMap &outputs, const AttributeMap &attrs,
+                     std::shared_ptr<Scope> scope)
+      : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
+  virtual void InferShape() const = 0;
+  virtual void Run() const = 0;
 };

 template <typename Dtype, typename P> class OpKernelBase : PaddleMobileObject {
-  public:
-    virtual void Compute(const P &para) const = 0;
+public:
+  virtual void Compute(const P &para) const = 0;

-    virtual ~OpKernelBase() = default;
+  virtual ~OpKernelBase() = default;
 };

 } // namespace framework

--- a/src/framework/paddle_mobile_object.h
+++ b/src/framework/paddle_mobile_object.h
@@ -24,13 +24,13 @@ SOFTWARE.
 namespace paddle_mobile {

 class PaddleMobileObject {
-  public:
-    virtual std::string ToString() {
-        char address[128] = {0};
-        sprintf(address, "%p", this);
-        return std::string(address);
-    }
+public:
+  virtual std::string ToString() {
+    char address[128] = {0};
+    sprintf(address, "%p", this);
+    return std::string(address);
+  }

-  private:
+private:
 };
 } // namespace paddle_mobile
--- a/src/framework/program-optimize/node.cpp
+++ b/src/framework/program-optimize/node.cpp
@@ -25,71 +25,71 @@ namespace paddle_mobile {
 namespace framework {

 Node &Node::operator>(std::shared_ptr<Node> node) {
-    outputs_.push_back(node);
-    std::shared_ptr<Node> this_node;
-    node->inputs_.push_back(this);
-    return *node;
+  outputs_.push_back(node);
+  std::shared_ptr<Node> this_node;
+  node->inputs_.push_back(this);
+  return *node;
 }

 bool Node::operator==(const Node &in) {
-    if (in.type_ == this->type_) {
-        if (this->outputs_.size() == in.outputs_.size()) {
-            for (int i = 0; i < outputs_.size(); ++i) {
-                if (!(*outputs_[i] == *in.outputs_[i])) {
-                    return false;
-                }
-            }
-        } else {
-            return false;
+  if (in.type_ == this->type_) {
+    if (this->outputs_.size() == in.outputs_.size()) {
+      for (int i = 0; i < outputs_.size(); ++i) {
+        if (!(*outputs_[i] == *in.outputs_[i])) {
+          return false;
        }
+      }
    } else {
-        return false;
+      return false;
    }
-    return true;
+  } else {
+    return false;
+  }
+  return true;
 }

 std::string Node::ToString(std::string blank, const Node *node) const {
-    std::stringstream ss;
-    ss << type_ << "-> \n";
+  std::stringstream ss;
+  ss << type_ << "-> \n";

-    if (inputs_.size() > 1 && node != inputs_.back()) {
-        return ss.str();
-    } else if (inputs_.size() > 1 && node == inputs_.back()) {
-        ss << "\n" << blank << type_ << "\n";
-    }
-
-    for (int i = 0; i < outputs_.size(); ++i) {
-        ss << blank << outputs_[i]->ToString(blank + " ", this) << "";
-    }
+  if (inputs_.size() > 1 && node != inputs_.back()) {
    return ss.str();
+  } else if (inputs_.size() > 1 && node == inputs_.back()) {
+    ss << "\n" << blank << type_ << "\n";
+  }
+
+  for (int i = 0; i < outputs_.size(); ++i) {
+    ss << blank << outputs_[i]->ToString(blank + " ", this) << "";
+  }
+  return ss.str();
 }

 std::string Node::ToString() const { return this->ToString(" ", this); }

 Node &Node::To(int index) {
-    if (index == 0) {
-        this->outputs_.clear();
-    }
+  if (index == 0) {
+    this->outputs_.clear();
+  }

-    for (int j = 0; j < this->outputs_.size(); ++j) {
-        outputs_[j]->To(index - 1);
-    }
-    return *this;
+  for (int j = 0; j < this->outputs_.size(); ++j) {
+    outputs_[j]->To(index - 1);
+  }
+  return *this;
 }

 uint Node::depth(uint begin) {
-    uint depth = 0;
-    begin++;
-    for (int i = 0; i < outputs_.size(); ++i) {
-        uint output_depth = outputs_[i]->depth(begin);
-        depth = output_depth > depth ? output_depth : depth;
-    }
-    return begin > depth ? begin : depth;
+  uint depth = 0;
+  begin++;
+  for (int i = 0; i < outputs_.size(); ++i) {
+    uint output_depth = outputs_[i]->depth(begin);
+    depth = output_depth > depth ? output_depth : depth;
+  }
+  return begin > depth ? begin : depth;
 }

 Print &operator<<(Print &printer, const Node &node) {
-    printer << node.ToString();
-    return printer;
+  printer << node.ToString();
+  return printer;
 }

 } // namespace framework

--- a/src/framework/program-optimize/node.h
+++ b/src/framework/program-optimize/node.h
@@ -29,22 +29,22 @@ namespace paddle_mobile {
 namespace framework {

 class Node : PaddleMobileObject {
-  public:
-    Node(const std::string &type) : type_(type) {}
-    Node(std::shared_ptr<OpDesc> op_desc)
-        : op_desc_(op_desc), type_(op_desc->Type()){};
-    Node &operator>(std::shared_ptr<Node> node);
-    bool operator==(const Node &in);
-    std::string ToString() const;
-    Node &To(int index);
-    uint depth(uint begin = 0);
+public:
+  Node(const std::string &type) : type_(type) {}
+  Node(std::shared_ptr<OpDesc> op_desc)
+      : op_desc_(op_desc), type_(op_desc->Type()){};
+  Node &operator>(std::shared_ptr<Node> node);
+  bool operator==(const Node &in);
+  std::string ToString() const;
+  Node &To(int index);
+  uint depth(uint begin = 0);

-  private:
-    std::shared_ptr<OpDesc> op_desc_;
-    std::string ToString(std::string blank, const Node *node) const;
-    std::vector<std::shared_ptr<Node>> outputs_;
-    std::vector<Node *> inputs_;
-    std::string type_;
+private:
+  std::shared_ptr<OpDesc> op_desc_;
+  std::string ToString(std::string blank, const Node *node) const;
+  std::vector<std::shared_ptr<Node>> outputs_;
+  std::vector<Node *> inputs_;
+  std::string type_;
 };

 Print &operator<<(Print &printer, const Node &node);

--- a/src/framework/program-optimize/program_optimize.cpp
+++ b/src/framework/program-optimize/program_optimize.cpp
@@ -26,49 +26,48 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}

 std::shared_ptr<ProgramDesc>
 ProgramOptimize::FushionOptimize(std::shared_ptr<ProgramDesc> ori_des) {
-    for (int i = 0; i < ori_des->Blocks().size(); ++i) {
-        std::unordered_map<std::string, std::shared_ptr<Node>> output_nodes;
-        std::shared_ptr<Node> begin_node;
-        auto block = ori_des->Block(i);
-        //        DLOG << " ops size: " << block->Ops().size();
-        for (int j = 0; j < block->Ops().size(); ++j) {
-            auto op = block->Ops()[j];
-            auto op_type = op->Type();
-            //            DLOG << "op type: " << op_type << " index: " << j;
-            if (op_input_output_key.find(op->Type()) ==
-                op_input_output_key.end()) {
-                return NULL;
-            }
+  for (int i = 0; i < ori_des->Blocks().size(); ++i) {
+    std::unordered_map<std::string, std::shared_ptr<Node>> output_nodes;
+    std::shared_ptr<Node> begin_node;
+    auto block = ori_des->Block(i);
+    //        DLOG << " ops size: " << block->Ops().size();
+    for (int j = 0; j < block->Ops().size(); ++j) {
+      auto op = block->Ops()[j];
+      auto op_type = op->Type();
+      //            DLOG << "op type: " << op_type << " index: " << j;
+      if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
+        return NULL;
+      }

-            std::shared_ptr<Node> node = std::make_shared<Node>(op);
-            if (j == 0) {
-                begin_node = node;
-            }
+      std::shared_ptr<Node> node = std::make_shared<Node>(op);
+      if (j == 0) {
+        begin_node = node;
+      }

-            auto input_keys = op_input_output_key.at(op->Type()).first;
-            for (auto input_key : input_keys) {
-                auto op_inputs = op->Input(input_key);
-                for (int l = 0; l < op_inputs.size(); ++l) {
-                    std::string input_key = op_inputs[l];
-                    if (output_nodes.find(input_key) != output_nodes.end()) {
-                        auto input_node = output_nodes[input_key];
-                        *input_node > node;
-                    }
-                }
-            }
-
-            auto output_keys = op_input_output_key.at(op_type).second;
-            for (auto output_key : output_keys) {
-                auto op_outputs = op->Output(output_key);
-                for (int k = 0; k < op_outputs.size(); ++k) {
-                    output_nodes[op_outputs[k]] = node;
-                }
-            }
+      auto input_keys = op_input_output_key.at(op->Type()).first;
+      for (auto input_key : input_keys) {
+        auto op_inputs = op->Input(input_key);
+        for (int l = 0; l < op_inputs.size(); ++l) {
+          std::string input_key = op_inputs[l];
+          if (output_nodes.find(input_key) != output_nodes.end()) {
+            auto input_node = output_nodes[input_key];
+            *input_node > node;
+          }
        }
+      }

-        DLOG << "node: \n" << *begin_node;
+      auto output_keys = op_input_output_key.at(op_type).second;
+      for (auto output_key : output_keys) {
+        auto op_outputs = op->Output(output_key);
+        for (int k = 0; k < op_outputs.size(); ++k) {
+          output_nodes[op_outputs[k]] = node;
+        }
+      }
    }
-    return ori_des;
+
+    DLOG << "node: \n" << *begin_node;
+  }
+  return ori_des;
 }
 } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/program-optimize/program_optimize.h
+++ b/src/framework/program-optimize/program_optimize.h
@@ -26,16 +26,16 @@ namespace paddle_mobile {

 namespace framework {
 class ProgramOptimize {
-  public:
-    ProgramOptimize() {}
-    std::shared_ptr<ProgramDesc> Optimize();
-    std::shared_ptr<ProgramDesc>
-    FushionOptimize(std::shared_ptr<ProgramDesc> ori_des);
+public:
+  ProgramOptimize() {}
+  std::shared_ptr<ProgramDesc> Optimize();
+  std::shared_ptr<ProgramDesc>
+  FushionOptimize(std::shared_ptr<ProgramDesc> ori_des);

-  private:
-    //                std::shared_ptr<ProgramDesc> ori_desc_;
-    std::vector<std::unordered_map<std::string, std::shared_ptr<Node>>>
-        outputs_nodes_;
+private:
+  //                std::shared_ptr<ProgramDesc> ori_desc_;
+  std::vector<std::unordered_map<std::string, std::shared_ptr<Node>>>
+      outputs_nodes_;
 };
 } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/program.h
+++ b/src/framework/program.h
@@ -28,12 +28,12 @@ namespace framework {

 template <typename Dtype, Precision P = Precision::FP32>
 class Program : PaddleMobileObject {
-  public:
-    std::shared_ptr<ProgramDesc> originProgram;
-    std::shared_ptr<ProgramDesc> optimizeProgram;
-    std::shared_ptr<Scope> scope;
+public:
+  std::shared_ptr<ProgramDesc> originProgram;
+  std::shared_ptr<ProgramDesc> optimizeProgram;
+  std::shared_ptr<Scope> scope;

-  private:
+private:
 };

 } // namespace framework

--- a/src/framework/program_desc.cpp
+++ b/src/framework/program_desc.cpp
@@ -8,14 +8,14 @@ namespace paddle_mobile {
 namespace framework {

 ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) : desc_(desc) {
-    for (auto &block_desc : *desc_.mutable_blocks()) {
-        // new framework::BlockDesc(block_desc)
-        blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
-    }
+  for (auto &block_desc : *desc_.mutable_blocks()) {
+    // new framework::BlockDesc(block_desc)
+    blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
+  }
 }

 std::shared_ptr<BlockDesc> ProgramDesc::Block(size_t idx) {
-    return blocks_[idx];
+  return blocks_[idx];
 }

 } // namespace framework

--- a/src/framework/program_desc.h
+++ b/src/framework/program_desc.h
@@ -28,14 +28,14 @@ namespace paddle_mobile {
 namespace framework {

 class ProgramDesc : PaddleMobileObject {
-  public:
-    ProgramDesc(const proto::ProgramDesc &desc);
-    std::shared_ptr<BlockDesc> Block(size_t idx);
-    const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; };
+public:
+  ProgramDesc(const proto::ProgramDesc &desc);
+  std::shared_ptr<BlockDesc> Block(size_t idx);
+  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; };

-  private:
-    std::vector<std::shared_ptr<BlockDesc>> blocks_;
-    proto::ProgramDesc desc_;
+private:
+  std::vector<std::shared_ptr<BlockDesc>> blocks_;
+  proto::ProgramDesc desc_;
 };

 } // namespace framework

--- a/src/framework/scope.cc
+++ b/src/framework/scope.cc
@@ -7,20 +7,20 @@ namespace paddle_mobile {
 namespace framework {

 Scope &Scope::NewScope() const {
-    std::unique_lock<std::mutex> lock(mutex_);
-    kids_.push_back(new Scope(this));
-    return *kids_.back();
+  std::unique_lock<std::mutex> lock(mutex_);
+  kids_.push_back(new Scope(this));
+  return *kids_.back();
 }

 Variable *Scope::Var(const std::string &name) {
-    auto *pvar = FindVarLocally(name);
-    if (pvar != nullptr) {
-        return pvar;
-    };
-    pvar = new Variable;
-    vars_[name] = pvar;
-    pvar->name_ = &(vars_.find(name)->first);
+  auto *pvar = FindVarLocally(name);
+  if (pvar != nullptr) {
    return pvar;
+  };
+  pvar = new Variable;
+  vars_[name] = pvar;
+  pvar->name_ = &(vars_.find(name)->first);
+  return pvar;
 }

 //            Variable* Scope::Var(std::string* name) {
@@ -33,70 +33,70 @@ Variable *Scope::Var(const std::string &name) {
 //            }

 Variable *Scope::FindVar(const std::string &name) const {
-    auto *pvar = FindVarLocally(name);
-    if (pvar != nullptr) {
-        return pvar;
-    }
-    return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
+  auto *pvar = FindVarLocally(name);
+  if (pvar != nullptr) {
+    return pvar;
+  }
+  return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
 }

 const Scope *Scope::FindScope(const Variable *var) const {
-    for (auto &name_var : vars_) {
-        if (name_var.second == var) {
-            return this;
-        }
+  for (auto &name_var : vars_) {
+    if (name_var.second == var) {
+      return this;
    }
-    return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
+  }
+  return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
 }

 void Scope::DropKids() {
-    for (Scope *s : kids_) {
-        delete s;
-    }
-    kids_.clear();
+  for (Scope *s : kids_) {
+    delete s;
+  }
+  kids_.clear();
 }

 std::vector<std::string> Scope::LocalVarNames() const {
-    std::vector<std::string> known_vars;
-    known_vars.reserve(vars_.size());
-    for (auto &name_var : vars_) {
-        known_vars.emplace_back(name_var.first);
-    }
-    return known_vars;
+  std::vector<std::string> known_vars;
+  known_vars.reserve(vars_.size());
+  for (auto &name_var : vars_) {
+    known_vars.emplace_back(name_var.first);
+  }
+  return known_vars;
 }

 void Scope::DeleteScope(Scope *scope) const {
-    std::unique_lock<std::mutex> lock(mutex_);
-    auto it = std::find(kids_.begin(), kids_.end(), scope);
-    kids_.erase(it);
-    delete scope;
-    // deferent
+  std::unique_lock<std::mutex> lock(mutex_);
+  auto it = std::find(kids_.begin(), kids_.end(), scope);
+  kids_.erase(it);
+  delete scope;
+  // deferent
 }

 void Scope::EraseVars(const std::vector<std::string> &var_names) {
-    std::set<std::string> var_set(var_names.begin(), var_names.end());
-    for (auto it = vars_.begin(); it != vars_.end();) {
-        if (var_set.find(it->first) != var_set.end()) {
-            delete it->second;
-            it = vars_.erase(it);
-        } else {
-            ++it;
-        }
+  std::set<std::string> var_set(var_names.begin(), var_names.end());
+  for (auto it = vars_.begin(); it != vars_.end();) {
+    if (var_set.find(it->first) != var_set.end()) {
+      delete it->second;
+      it = vars_.erase(it);
+    } else {
+      ++it;
    }
+  }
 }

 void Scope::Rename(const std::string &origin_name,
                   const std::string &new_name) const {
-    auto origin_it = vars_.find(origin_name);
-    if (origin_it == vars_.end()) {
-        return;
-    }
-    auto new_it = vars_.find(new_name);
-    if (new_it != vars_.end()) {
-        return;
-    }
-    vars_[new_name] = origin_it->second;
-    vars_.erase(origin_it);
+  auto origin_it = vars_.find(origin_name);
+  if (origin_it == vars_.end()) {
+    return;
+  }
+  auto new_it = vars_.find(new_name);
+  if (new_it != vars_.end()) {
+    return;
+  }
+  vars_[new_name] = origin_it->second;
+  vars_.erase(origin_it);
 }
 //
 //            std::string Scope::Rename(const std::string& origin_name)
@@ -108,11 +108,11 @@ void Scope::Rename(const std::string &origin_name,
 //            }

 Variable *Scope::FindVarLocally(const std::string &name) const {
-    auto it = vars_.find(name);
-    if (it != vars_.end()) {
-        return it->second;
-    }
-    return nullptr;
+  auto it = vars_.find(name);
+  if (it != vars_.end()) {
+    return it->second;
+  }
+  return nullptr;
 }

 } // namespace framework

--- a/src/framework/scope.h
+++ b/src/framework/scope.h
@@ -26,56 +26,56 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace framework {
 class Scope {
-  public:
-    Scope() {}
-    ~Scope() {}
+public:
+  Scope() {}
+  ~Scope() {}

-    Scope &NewScope() const;
+  Scope &NewScope() const;

-    /// Create a variable with given name if it doesn't exist.
-    Variable *Var(const std::string &name);
+  /// Create a variable with given name if it doesn't exist.
+  Variable *Var(const std::string &name);

-    /// Create a variable with a scope-unique name.
-    Variable *Var(std::string *name = nullptr);
+  /// Create a variable with a scope-unique name.
+  Variable *Var(std::string *name = nullptr);

-    void EraseVars(const std::vector<std::string> &var_names);
+  void EraseVars(const std::vector<std::string> &var_names);

-    /// Find a variable in the scope or any of its ancestors.  Returns
-    /// nullptr if cannot find.
-    Variable *FindVar(const std::string &name) const;
+  /// Find a variable in the scope or any of its ancestors.  Returns
+  /// nullptr if cannot find.
+  Variable *FindVar(const std::string &name) const;

-    const Scope *parent() const { return parent_; }
+  const Scope *parent() const { return parent_; }

-    /// Find the scope or an ancestor scope that contains the given
-    /// variable.
-    const Scope *FindScope(const Variable *var) const;
+  /// Find the scope or an ancestor scope that contains the given
+  /// variable.
+  const Scope *FindScope(const Variable *var) const;

-    void DeleteScope(Scope *scope) const;
+  void DeleteScope(Scope *scope) const;

-    /// Drop all kids scopes belonged to this scope.
-    void DropKids();
+  /// Drop all kids scopes belonged to this scope.
+  void DropKids();

-    // enumerate all the variables current contains.
-    std::vector<std::string> LocalVarNames() const;
+  // enumerate all the variables current contains.
+  std::vector<std::string> LocalVarNames() const;

-    // Rename variable to a new name
-    void Rename(const std::string &origin_name,
-                const std::string &new_name) const;
+  // Rename variable to a new name
+  void Rename(const std::string &origin_name,
+              const std::string &new_name) const;

-    // Rename variable to a new name and return the new name
-    std::string Rename(const std::string &origin_name) const;
+  // Rename variable to a new name and return the new name
+  std::string Rename(const std::string &origin_name) const;

-    Variable *FindVarLocally(const std::string &name) const;
+  Variable *FindVarLocally(const std::string &name) const;

-  private:
-    // Call Scope::NewScope for a sub-scope.
-    explicit Scope(Scope const *parent) : parent_(parent) {}
+private:
+  // Call Scope::NewScope for a sub-scope.
+  explicit Scope(Scope const *parent) : parent_(parent) {}

-    mutable std::unordered_map<std::string, Variable *> vars_;
-    mutable std::list<Scope *> kids_;
-    Scope const *parent_{nullptr};
+  mutable std::unordered_map<std::string, Variable *> vars_;
+  mutable std::list<Scope *> kids_;
+  Scope const *parent_{nullptr};

-    mutable std::mutex mutex_;
+  mutable std::mutex mutex_;
 };
 } // namespace framework
 } // namespace paddle_mobile
--- a/src/framework/selected_rows.h
+++ b/src/framework/selected_rows.h
@@ -27,54 +27,54 @@ namespace paddle_mobile {
 namespace framework {

 class SelectedRows {
-  public:
-    SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
-        : rows_(rows), height_(height) {
-        value_.reset(new Tensor());
-    }
-
-    SelectedRows() {
-        height_ = 0;
-        value_.reset(new Tensor());
-    }
-
-    const Tensor &value() const { return *value_; }
-
-    Tensor *mutable_value() { return value_.get(); }
-
-    int64_t height() const { return height_; }
-
-    void set_height(int64_t height) { height_ = height; }
-
-    const std::vector<int64_t> &rows() const { return rows_; }
-
-    std::vector<int64_t> *mutable_rows() { return &rows_; }
-
-    void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
-
-    /**
-     * get the index of id in rows
-     */
-    int64_t index(int64_t id) const {
-        auto it = std::find(rows_.begin(), rows_.end(), id);
-        //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
-        return static_cast<int64_t>(std::distance(rows_.begin(), it));
-    }
-
-    DDim GetCompleteDims() const {
-        std::vector<int64_t> dims = vectorize(value_->dims());
-        dims[0] = height_;
-        return make_ddim(dims);
-    }
-
-  private:
-    // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
-    // here.
-    // SelectedRows are simply concated when adding together. Until a
-    // SelectedRows add a Tensor, will the duplicate rows be handled.
-    std::vector<int64_t> rows_;
-    std::unique_ptr<Tensor> value_{nullptr};
-    int64_t height_;
+public:
+  SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
+      : rows_(rows), height_(height) {
+    value_.reset(new Tensor());
+  }
+
+  SelectedRows() {
+    height_ = 0;
+    value_.reset(new Tensor());
+  }
+
+  const Tensor &value() const { return *value_; }
+
+  Tensor *mutable_value() { return value_.get(); }
+
+  int64_t height() const { return height_; }
+
+  void set_height(int64_t height) { height_ = height; }
+
+  const std::vector<int64_t> &rows() const { return rows_; }
+
+  std::vector<int64_t> *mutable_rows() { return &rows_; }
+
+  void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
+
+  /**
+   * get the index of id in rows
+   */
+  int64_t index(int64_t id) const {
+    auto it = std::find(rows_.begin(), rows_.end(), id);
+    //    PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
+    return static_cast<int64_t>(std::distance(rows_.begin(), it));
+  }
+
+  DDim GetCompleteDims() const {
+    std::vector<int64_t> dims = vectorize(value_->dims());
+    dims[0] = height_;
+    return make_ddim(dims);
+  }
+
+private:
+  // Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
+  // here.
+  // SelectedRows are simply concated when adding together. Until a
+  // SelectedRows add a Tensor, will the duplicate rows be handled.
+  std::vector<int64_t> rows_;
+  std::unique_ptr<Tensor> value_{nullptr};
+  int64_t height_;
 };

 } // namespace framework

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -29,305 +29,304 @@ namespace framework {
 template <typename... T> struct SizeOfTypeFunctor;

 template <typename T> struct SizeOfTypeFunctor<T> {
-    size_t operator()(std::type_index type) const {
-        if (typeid(T).hash_code() == type.hash_code()) {
-            return sizeof(T);
-        } else {
-            return 0UL;
-        }
+  size_t operator()(std::type_index type) const {
+    if (typeid(T).hash_code() == type.hash_code()) {
+      return sizeof(T);
+    } else {
+      return 0UL;
    }
+  }
 };

 template <> struct SizeOfTypeFunctor<> {
-    size_t operator()(std::type_index type) const { return 0UL; }
+  size_t operator()(std::type_index type) const { return 0UL; }
 };

 template <typename HEAD, typename... TAIL>
 struct SizeOfTypeFunctor<HEAD, TAIL...> {
-    size_t operator()(std::type_index type) const {
-        SizeOfTypeFunctor<HEAD> head;
-        size_t head_size = head(type);
-        if (head_size != 0) {
-            return head_size;
-        }
-        SizeOfTypeFunctor<TAIL...> tail;
-        return tail(type);
+  size_t operator()(std::type_index type) const {
+    SizeOfTypeFunctor<HEAD> head;
+    size_t head_size = head(type);
+    if (head_size != 0) {
+      return head_size;
    }
+    SizeOfTypeFunctor<TAIL...> tail;
+    return tail(type);
+  }
 };

 static inline size_t SizeOfType(std::type_index type) {
-    SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t>
-        functor;
-    size_t size = functor(type);
-    //  PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s",
-    //  type.name());
-    return size;
+  SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor;
+  size_t size = functor(type);
+  //  PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s",
+  //  type.name());
+  return size;
 }

 class LoDTensor;

 class Tensor {
-  public:
-    Tensor() : offset_(0) {}
-
-    /*! Return a pointer to mutable memory block. */
-    template <typename T> inline T *data() {
-        check_memory_size();
-        //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
-        //                     holder_->type().hash_code() ==
-        //                     typeid(T).hash_code(),
-        //                 "Tensor holds the wrong type, it holds %s",
-        //                 this->holder_->type().name());
-        return reinterpret_cast<T *>(
-            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+public:
+  Tensor() : offset_(0) {}
+
+  /*! Return a pointer to mutable memory block. */
+  template <typename T> inline T *data() {
+    check_memory_size();
+    //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
+    //                     holder_->type().hash_code() ==
+    //                     typeid(T).hash_code(),
+    //                 "Tensor holds the wrong type, it holds %s",
+    //                 this->holder_->type().name());
+    return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
+                                 offset_);
+  }
+
+  /*! Return a pointer to constant memory block. */
+  template <typename T> inline const T *data() const {
+    check_memory_size();
+    //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
+    //                     holder_->type().hash_code() ==
+    //                     typeid(T).hash_code(),
+    //                 "Tensor holds the wrong type, it holds %s",
+    //                 this->holder_->type().name());
+
+    return reinterpret_cast<const T *>(
+        reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+  }
+
+  inline bool IsInitialized() const { return holder_ != nullptr; }
+
+  /**
+   * @brief   Return a pointer to mutable memory block.
+   * @note    If not exist, then allocation.
+   */
+  template <typename T> inline T *mutable_data() {
+    static_assert(std::is_pod<T>::value, "T must be POD");
+    return reinterpret_cast<T *>(mutable_data(typeid(T)));
+  }
+
+  inline void *mutable_data(std::type_index type) {
+    if (holder_ != nullptr) {
+      holder_->set_type(type);
    }
-
-    /*! Return a pointer to constant memory block. */
-    template <typename T> inline const T *data() const {
-        check_memory_size();
-        //  PADDLE_ENFORCE(std::is_same<T, void>::value ||
-        //                     holder_->type().hash_code() ==
-        //                     typeid(T).hash_code(),
-        //                 "Tensor holds the wrong type, it holds %s",
-        //                 this->holder_->type().name());
-
-        return reinterpret_cast<const T *>(
-            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+    //  PADDLE_ENFORCE_GE(numel(), 0,
+    //                    "When calling this method, the Tensor's
+    //                    numel must be
+    //                    " "equal or larger than zero. " "Please
+    //                    check
+    //                    Tensor::Resize has been called first.");
+    int64_t size = numel() * SizeOfType(type);
+    /* some versions of boost::variant don't have operator!= */
+    if (holder_ == nullptr || holder_->size() < size + offset_) {
+      holder_.reset(new PlaceholderImpl(size, type));
+
+      offset_ = 0;
    }
-
-    inline bool IsInitialized() const { return holder_ != nullptr; }
-
-    /**
-     * @brief   Return a pointer to mutable memory block.
-     * @note    If not exist, then allocation.
-     */
-    template <typename T> inline T *mutable_data() {
-        static_assert(std::is_pod<T>::value, "T must be POD");
-        return reinterpret_cast<T *>(mutable_data(typeid(T)));
+    return reinterpret_cast<void *>(
+        reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+  }
+
+  inline void *mutable_data() {
+    //  PADDLE_ENFORCE(this->holder_ != nullptr,
+    //                 "Cannot invoke mutable data if current hold
+    //                 nothing.");
+    return mutable_data(holder_->type());
+  }
+
+  /**
+   * @brief     Return a pointer to mutable memory block.
+   *
+   * @param[in] dims    The dimensions of the memory block.
+   * @param[in] place   The place of the memory block.
+   *
+   * @note      If not exist, then allocation.
+   */
+  template <typename T> inline T *mutable_data(DDim dims) {
+    static_assert(std::is_pod<T>::value, "T must be POD");
+    Resize(dims);
+    return mutable_data<T>();
+  }
+
+  /*! Return the dimensions of the memory block. */
+  inline const DDim &dims() const { return dims_; }
+
+  /*! Return the numel of the memory block. */
+  inline int64_t numel() const { return product(dims_); }
+
+  /*! Resize the dimensions of the memory block. */
+  inline Tensor &Resize(const DDim &dims) {
+    dims_ = dims;
+    return *this;
+  }
+
+  /*! The internal of two tensors share the same memory block. */
+  inline Tensor &ShareDataWith(const Tensor &src) {
+    src.check_memory_size();
+    *this = src;
+    return *this;
+  }
+
+  /**
+   * @brief  Return a sub-tensor of the given tensor.
+   *
+   * @param[in] begin_idx   The index of the start row(inclusive) to
+   * slice.
+   *                        The index number begins from 0.
+   * @param[in] end_idx     The index of the end row(exclusive) to
+   * slice.
+   *                        The index number begins from 0.
+   */
+  inline Tensor Slice(int begin_idx, int end_idx) const {
+    check_memory_size();
+    //  PADDLE_ENFORCE_GE(begin_idx, 0,
+    //                    "The start row index must be greater than
+    //                    0.");
+    //  PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is
+    //  out of
+    //  bound."); PADDLE_ENFORCE_LT(
+    //      begin_idx, end_idx,
+    //      "The start row index must be lesser than the end row
+    //      index.");
+
+    if (dims_[0] == 1) {
+      return *this;
+    } else {
+      size_t base = numel() / dims_[0];
+      Tensor dst;
+      dst.holder_ = holder_;
+      dst.set_layout(layout_);
+      DDim dst_dims = dims_;
+      dst_dims[0] = end_idx - begin_idx;
+      dst.Resize(dst_dims);
+      dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
+      return dst;
    }
-
-    inline void *mutable_data(std::type_index type) {
-        if (holder_ != nullptr) {
-            holder_->set_type(type);
-        }
-        //  PADDLE_ENFORCE_GE(numel(), 0,
-        //                    "When calling this method, the Tensor's
-        //                    numel must be
-        //                    " "equal or larger than zero. " "Please
-        //                    check
-        //                    Tensor::Resize has been called first.");
-        int64_t size = numel() * SizeOfType(type);
-        /* some versions of boost::variant don't have operator!= */
-        if (holder_ == nullptr || holder_->size() < size + offset_) {
-            holder_.reset(new PlaceholderImpl(size, type));
-
-            offset_ = 0;
-        }
-        return reinterpret_cast<void *>(
-            reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
+  }
+
+  std::type_index type() const {
+    //                PADDLE_ENFORCE_NOT_NULL(
+    //                        holder_, "Tensor not initialized yet
+    //                        when
+    //                        Tensor::type() is called.");
+    return holder_->type();
+  }
+
+  // memory size returns the holding memory size in byte.
+  size_t memory_size() const {
+    return holder_ == nullptr ? 0UL : holder_->size() - offset_;
+  }
+
+  inline void check_memory_size() const {
+    //  PADDLE_ENFORCE_NOT_NULL(
+    //      holder_, "Tensor holds no memory. Call
+    //      Tensor::mutable_data
+    //      first.");
+    //  PADDLE_ENFORCE_LE(
+    //      numel() * SizeOfType(type()), memory_size(),
+    //      "Tensor's dims_ is out of bound. Call
+    //      Tensor::mutable_data "
+    //      "first to re-allocate memory.\n"
+    //      "or maybe the required data-type mismatches the data
+    //      already
+    //      stored.");
+  }
+
+  inline DataLayout layout() const { return layout_; }
+
+  inline void set_layout(const DataLayout layout) { layout_ = layout; }
+
+private:
+  /**
+   * @note    Placeholder hides type T, so it doesn't appear as a
+   * template
+   *          parameter of Variable.
+   */
+  struct Placeholder {
+    virtual ~Placeholder() = default;
+
+    virtual void *ptr() const = 0;
+
+    virtual size_t size() const = 0;
+
+    virtual std::type_index type() const = 0;
+
+    virtual void set_type(std::type_index type) = 0;
+  };
+
+  struct PlaceholderImpl : public Placeholder {
+    PlaceholderImpl(size_t size, std::type_index type)
+        : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
+               memory::PODDeleter<uint8_t>()),
+          size_(size), type_(type) {
+      //                    PADDLE_ENFORCE_NOT_NULL(ptr_,
+      //                    "Insufficient %s
+      //                    memory to allocation.",
+      //                                            (is_cpu_place(place_)
+      //                                            ?
+      //                                            "CPU" :
+      //                                            "GPU"));
    }

-    inline void *mutable_data() {
-        //  PADDLE_ENFORCE(this->holder_ != nullptr,
-        //                 "Cannot invoke mutable data if current hold
-        //                 nothing.");
-        return mutable_data(holder_->type());
-    }
+    virtual size_t size() const { return size_; }

-    /**
-     * @brief     Return a pointer to mutable memory block.
-     *
-     * @param[in] dims    The dimensions of the memory block.
-     * @param[in] place   The place of the memory block.
-     *
-     * @note      If not exist, then allocation.
-     */
-    template <typename T> inline T *mutable_data(DDim dims) {
-        static_assert(std::is_pod<T>::value, "T must be POD");
-        Resize(dims);
-        return mutable_data<T>();
-    }
+    virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }

-    /*! Return the dimensions of the memory block. */
-    inline const DDim &dims() const { return dims_; }
+    virtual std::type_index type() const { return type_; }

-    /*! Return the numel of the memory block. */
-    inline int64_t numel() const { return product(dims_); }
+    virtual void set_type(std::type_index type) { type_ = type; }

-    /*! Resize the dimensions of the memory block. */
-    inline Tensor &Resize(const DDim &dims) {
-        dims_ = dims;
-        return *this;
-    }
+    /*! the pointer of memory block. */
+    std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;

-    /*! The internal of two tensors share the same memory block. */
-    inline Tensor &ShareDataWith(const Tensor &src) {
-        src.check_memory_size();
-        *this = src;
-        return *this;
-    }
+    /*! the size of memory block. */
+    size_t size_;

-    /**
-     * @brief  Return a sub-tensor of the given tensor.
-     *
-     * @param[in] begin_idx   The index of the start row(inclusive) to
-     * slice.
-     *                        The index number begins from 0.
-     * @param[in] end_idx     The index of the end row(exclusive) to
-     * slice.
-     *                        The index number begins from 0.
-     */
-    inline Tensor Slice(int begin_idx, int end_idx) const {
-        check_memory_size();
-        //  PADDLE_ENFORCE_GE(begin_idx, 0,
-        //                    "The start row index must be greater than
-        //                    0.");
-        //  PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is
-        //  out of
-        //  bound."); PADDLE_ENFORCE_LT(
-        //      begin_idx, end_idx,
-        //      "The start row index must be lesser than the end row
-        //      index.");
-
-        if (dims_[0] == 1) {
-            return *this;
-        } else {
-            size_t base = numel() / dims_[0];
-            Tensor dst;
-            dst.holder_ = holder_;
-            dst.set_layout(layout_);
-            DDim dst_dims = dims_;
-            dst_dims[0] = end_idx - begin_idx;
-            dst.Resize(dst_dims);
-            dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
-            return dst;
-        }
-    }
+    /* the current type of memory */
+    std::type_index type_;
+  };

-    std::type_index type() const {
-        //                PADDLE_ENFORCE_NOT_NULL(
-        //                        holder_, "Tensor not initialized yet
-        //                        when
-        //                        Tensor::type() is called.");
-        return holder_->type();
-    }
+  /*! holds the memory block if allocated. */
+  std::shared_ptr<Placeholder> holder_;

-    // memory size returns the holding memory size in byte.
-    size_t memory_size() const {
-        return holder_ == nullptr ? 0UL : holder_->size() - offset_;
-    }
+  /**
+   * @brief points to elements dimensions.
+   *
+   * @note dims_ do not indicate the memory block size.
+   */

-    inline void check_memory_size() const {
-        //  PADDLE_ENFORCE_NOT_NULL(
-        //      holder_, "Tensor holds no memory. Call
-        //      Tensor::mutable_data
-        //      first.");
-        //  PADDLE_ENFORCE_LE(
-        //      numel() * SizeOfType(type()), memory_size(),
-        //      "Tensor's dims_ is out of bound. Call
-        //      Tensor::mutable_data "
-        //      "first to re-allocate memory.\n"
-        //      "or maybe the required data-type mismatches the data
-        //      already
-        //      stored.");
-    }
+  DDim dims_;
+
+  /**
+   * @brief the layout of memory block, default is NHWC.
+   *
+   * @note the memory allocation order, describe how weight/data is
+   * stored
+   *       For example, in 4-D Tensor(rank=4), there are three
+   * commonly
+   *       used layout. They are
+   *            NCHW, NHWC, CHWN.
+   *       N,C,H,W for respectively the batch size, the number of
+   *       feature maps, the height, the width.
+   */
+
+  DataLayout layout_ = DataLayout::kNHWC;

-    inline DataLayout layout() const { return layout_; }
-
-    inline void set_layout(const DataLayout layout) { layout_ = layout; }
-
-  private:
-    /**
-     * @note    Placeholder hides type T, so it doesn't appear as a
-     * template
-     *          parameter of Variable.
-     */
-    struct Placeholder {
-        virtual ~Placeholder() = default;
-
-        virtual void *ptr() const = 0;
-
-        virtual size_t size() const = 0;
-
-        virtual std::type_index type() const = 0;
-
-        virtual void set_type(std::type_index type) = 0;
-    };
-
-    struct PlaceholderImpl : public Placeholder {
-        PlaceholderImpl(size_t size, std::type_index type)
-            : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
-                   memory::PODDeleter<uint8_t>()),
-              size_(size), type_(type) {
-            //                    PADDLE_ENFORCE_NOT_NULL(ptr_,
-            //                    "Insufficient %s
-            //                    memory to allocation.",
-            //                                            (is_cpu_place(place_)
-            //                                            ?
-            //                                            "CPU" :
-            //                                            "GPU"));
-        }
-
-        virtual size_t size() const { return size_; }
-
-        virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
-
-        virtual std::type_index type() const { return type_; }
-
-        virtual void set_type(std::type_index type) { type_ = type; }
-
-        /*! the pointer of memory block. */
-        std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
-
-        /*! the size of memory block. */
-        size_t size_;
-
-        /* the current type of memory */
-        std::type_index type_;
-    };
-
-    /*! holds the memory block if allocated. */
-    std::shared_ptr<Placeholder> holder_;
-
-    /**
-     * @brief points to elements dimensions.
-     *
-     * @note dims_ do not indicate the memory block size.
-     */
-
-    DDim dims_;
-
-    /**
-     * @brief the layout of memory block, default is NHWC.
-     *
-     * @note the memory allocation order, describe how weight/data is
-     * stored
-     *       For example, in 4-D Tensor(rank=4), there are three
-     * commonly
-     *       used layout. They are
-     *            NCHW, NHWC, CHWN.
-     *       N,C,H,W for respectively the batch size, the number of
-     *       feature maps, the height, the width.
-     */
-
-    DataLayout layout_ = DataLayout::kNHWC;
-
-    /**
-     * @brief   A PlaceHolder may be shared by more than one tensor.
-     *
-     * @note    Some of them may be slices of the others. So the offset_
-     *          is introduced here to indicate the byte offset between
-     *          PlaceHolder::ptr_ and where the tensor data really
-     * begins.
-     */
-    size_t offset_;
+  /**
+   * @brief   A PlaceHolder may be shared by more than one tensor.
+   *
+   * @note    Some of them may be slices of the others. So the offset_
+   *          is introduced here to indicate the byte offset between
+   *          PlaceHolder::ptr_ and where the tensor data really
+   * begins.
+   */
+  size_t offset_;
 };

 inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) {
-    Tensor res;
-    res.ShareDataWith(src);
-    res.Resize(flatten_to_2d(src.dims(), num_col_dims));
-    return res;
+  Tensor res;
+  res.ShareDataWith(src);
+  res.Resize(flatten_to_2d(src.dims(), num_col_dims));
+  return res;
 }

 } // namespace framework

--- a/src/framework/tensor_util.cc
+++ b/src/framework/tensor_util.cc
@@ -21,183 +21,182 @@ namespace paddle_mobile {
 namespace framework {

 void TensorCopy(const Tensor &src, Tensor *dst) {
-    //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
-    //  src.place() << " to
-    //  "
-    //          << dst_place;
-    src.check_memory_size();
+  //  VLOG(3) << "TensorCopy " << src.dims() << " from " <<
+  //  src.place() << " to
+  //  "
+  //          << dst_place;
+  src.check_memory_size();

-    dst->Resize(src.dims());
-    dst->set_layout(src.layout());
-    auto src_ptr = src.data<void>();
+  dst->Resize(src.dims());
+  dst->set_layout(src.layout());
+  auto src_ptr = src.data<void>();

-    auto dst_ptr = dst->mutable_data(src.type());
+  auto dst_ptr = dst->mutable_data(src.type());

-    auto size = src.numel() * SizeOfType(src.type());
+  auto size = src.numel() * SizeOfType(src.type());

-    memory::Copy(dst_ptr, src_ptr, size);
+  memory::Copy(dst_ptr, src_ptr, size);
 }

 void TensorCopySync(const Tensor &src, Tensor *dst) {
-    //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
-    //  src.place()
-    //          << " to " << dst_place;
-    src.check_memory_size();
-    dst->Resize(src.dims());
-    dst->set_layout(src.layout());
-    auto src_ptr = src.data<void>();
-    auto dst_ptr = dst->mutable_data(src.type());
-    auto size = src.numel() * SizeOfType(src.type());
-    memory::Copy(dst_ptr, src_ptr, size);
+  //  VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
+  //  src.place()
+  //          << " to " << dst_place;
+  src.check_memory_size();
+  dst->Resize(src.dims());
+  dst->set_layout(src.layout());
+  auto src_ptr = src.data<void>();
+  auto dst_ptr = dst->mutable_data(src.type());
+  auto size = src.numel() * SizeOfType(src.type());
+  memory::Copy(dst_ptr, src_ptr, size);
 }

 template <typename Predicate> struct AnyDTypeVisitor {
-    Predicate predicate_;
-    const Tensor &tensor_;
-    Tensor *out_;
-
-    AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
-        : predicate_(predicate), tensor_(tensor), out_(out) {}
-
-    template <typename T> void operator()() const {
-        //    auto t = EigenVector<T>::Flatten(tensor_);
-        //    auto o = EigenScalar<bool>::From(*out_);
-        // return any of predicate_(t) is true.
-        //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
-    }
+  Predicate predicate_;
+  const Tensor &tensor_;
+  Tensor *out_;
+
+  AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
+      : predicate_(predicate), tensor_(tensor), out_(out) {}
+
+  template <typename T> void operator()() const {
+    //    auto t = EigenVector<T>::Flatten(tensor_);
+    //    auto o = EigenScalar<bool>::From(*out_);
+    // return any of predicate_(t) is true.
+    //    o.device(*ctx_.eigen_device()) = predicate_(t).any();
+  }
 };

 template <typename Predicate>
 inline void AnyImpl(Predicate predicate, const Tensor &tensor,
                    framework::Tensor *out) {
-    VisitDataType(ToDataType(tensor.type()),
-                  AnyDTypeVisitor<Predicate>(predicate, tensor, out));
+  VisitDataType(ToDataType(tensor.type()),
+                AnyDTypeVisitor<Predicate>(predicate, tensor, out));
 }

 template <typename Predicate> struct AnyVisitor {
-    const framework::Tensor &tensor_;
-    Predicate predicate_;
-
-    AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
-        : tensor_(tensor), predicate_(std::move(predicate)) {}
-
-    bool operator()(void) const {
-        framework::Tensor out;
-        out.Resize({1});
-        out.mutable_data<bool>();
-        AnyImpl(predicate_, tensor_, &out);
-        return this->GetResult(out);
-    }
-
-    bool GetResult(const framework::Tensor &out) const {
-        return *out.data<bool>();
-    }
+  const framework::Tensor &tensor_;
+  Predicate predicate_;
+
+  AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
+      : tensor_(tensor), predicate_(std::move(predicate)) {}
+
+  bool operator()(void) const {
+    framework::Tensor out;
+    out.Resize({1});
+    out.mutable_data<bool>();
+    AnyImpl(predicate_, tensor_, &out);
+    return this->GetResult(out);
+  }
+
+  bool GetResult(const framework::Tensor &out) const {
+    return *out.data<bool>();
+  }
 };

 template <typename Predicate>
 inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
-    AnyVisitor<Predicate> visitor(tensor, predicate);
-    //  return platform::VisitPlace(visitor);
-    return visitor();
+  AnyVisitor<Predicate> visitor(tensor, predicate);
+  //  return platform::VisitPlace(visitor);
+  return visitor();
 }

 struct ContainsNANPredicate {
-    template <typename T>
-    auto operator()(const T &eigen_vec) const
-        -> decltype(std::declval<T>().isnan()) {
-        // Cast eigen_vector to vector of bool. true if is inf.
-        return eigen_vec.isnan();
-    }
+  template <typename T>
+  auto operator()(const T &eigen_vec) const
+      -> decltype(std::declval<T>().isnan()) {
+    // Cast eigen_vector to vector of bool. true if is inf.
+    return eigen_vec.isnan();
+  }
 };

 bool TensorContainsNAN(const framework::Tensor &tensor) {
-    ContainsNANPredicate predicate;
-    return Any(tensor, predicate);
+  ContainsNANPredicate predicate;
+  return Any(tensor, predicate);
 }

 struct ContainsInfPredicate {
-    template <typename T>
-    auto operator()(const T &eigen_vec) const
-        -> decltype(std::declval<T>().isinf()) {
-        // Cast eigen_vector to vector of bool. true if is inf.
-        return eigen_vec.isinf();
-    }
+  template <typename T>
+  auto operator()(const T &eigen_vec) const
+      -> decltype(std::declval<T>().isinf()) {
+    // Cast eigen_vector to vector of bool. true if is inf.
+    return eigen_vec.isinf();
+  }
 };

 bool TensorContainsInf(const framework::Tensor &tensor) {
-    ContainsInfPredicate predicate;
-    return Any(tensor, predicate);
+  ContainsInfPredicate predicate;
+  return Any(tensor, predicate);
 }

 void TensorToStream(std::ostream &os, const Tensor &tensor) {
-    { // the 1st field, uint32_t version
-        constexpr uint32_t version = 0;
-        os.write(reinterpret_cast<const char *>(&version), sizeof(version));
-    }
-    { // the 2nd field, tensor description
-        // int32_t  size
-        // void*    protobuf message
-        proto::VarType::TensorDesc desc;
-        desc.set_data_type(framework::ToDataType(tensor.type()));
-        auto dims = framework::vectorize(tensor.dims());
-        auto *pb_dims = desc.mutable_dims();
-        pb_dims->Resize(static_cast<int>(dims.size()), 0);
-        std::copy(dims.begin(), dims.end(), pb_dims->begin());
-        int32_t size = desc.ByteSize();
-        os.write(reinterpret_cast<const char *>(&size), sizeof(size));
-        auto out = desc.SerializeAsString();
-        os.write(out.data(), size);
-    }
-    { // the 3rd field, tensor data
-        uint64_t size = tensor.memory_size();
-        auto *data_ptr = tensor.data<void>();
-        //    PADDLE_ENFORCE(size <
-        //    std::numeric_limits<std::streamsize>::max(),
-        //                   "Index overflow when writing tensor");
-
-        os.write(static_cast<const char *>(data_ptr),
-                 static_cast<std::streamsize>(size));
-    }
+  { // the 1st field, uint32_t version
+    constexpr uint32_t version = 0;
+    os.write(reinterpret_cast<const char *>(&version), sizeof(version));
+  }
+  { // the 2nd field, tensor description
+    // int32_t  size
+    // void*    protobuf message
+    proto::VarType::TensorDesc desc;
+    desc.set_data_type(framework::ToDataType(tensor.type()));
+    auto dims = framework::vectorize(tensor.dims());
+    auto *pb_dims = desc.mutable_dims();
+    pb_dims->Resize(static_cast<int>(dims.size()), 0);
+    std::copy(dims.begin(), dims.end(), pb_dims->begin());
+    int32_t size = desc.ByteSize();
+    os.write(reinterpret_cast<const char *>(&size), sizeof(size));
+    auto out = desc.SerializeAsString();
+    os.write(out.data(), size);
+  }
+  { // the 3rd field, tensor data
+    uint64_t size = tensor.memory_size();
+    auto *data_ptr = tensor.data<void>();
+    //    PADDLE_ENFORCE(size <
+    //    std::numeric_limits<std::streamsize>::max(),
+    //                   "Index overflow when writing tensor");
+
+    os.write(static_cast<const char *>(data_ptr),
+             static_cast<std::streamsize>(size));
+  }
 }

 struct DeserializedDataFunctor {
-    DeserializedDataFunctor(void **buf, Tensor *tensor)
-        : buf_(buf), tensor_(tensor) {}
+  DeserializedDataFunctor(void **buf, Tensor *tensor)
+      : buf_(buf), tensor_(tensor) {}

-    template <typename T> void operator()() {
-        *buf_ = tensor_->mutable_data<T>();
-    }
+  template <typename T> void operator()() {
+    *buf_ = tensor_->mutable_data<T>();
+  }

-    void **buf_;
-    Tensor *tensor_;
+  void **buf_;
+  Tensor *tensor_;
 };

 void TensorFromStream(std::istream &is, framework::Tensor *tensor) {
-    uint32_t version;
-    is.read(reinterpret_cast<char *>(&version), sizeof(version));
-    //  PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
-    proto::VarType::TensorDesc desc;
-    { // int32_t size
-        // proto buffer
-        int32_t size;
-        is.read(reinterpret_cast<char *>(&size), sizeof(size));
-        std::unique_ptr<char[]> buf(new char[size]);
-        is.read(reinterpret_cast<char *>(buf.get()), size);
-        //    PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
-        //                   "Cannot parse tensor desc");
-    }
-    { // read tensor
-        std::vector<int64_t> dims;
-        dims.reserve(static_cast<size_t>(desc.dims().size()));
-        std::copy(desc.dims().begin(), desc.dims().end(),
-                  std::back_inserter(dims));
-        tensor->Resize(framework::make_ddim(dims));
-        void *buf;
-
-        framework::VisitDataType(desc.data_type(),
-                                 DeserializedDataFunctor(&buf, tensor));
-        is.read(static_cast<char *>(buf), tensor->memory_size());
-    }
+  uint32_t version;
+  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  //  PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
+  proto::VarType::TensorDesc desc;
+  { // int32_t size
+    // proto buffer
+    int32_t size;
+    is.read(reinterpret_cast<char *>(&size), sizeof(size));
+    std::unique_ptr<char[]> buf(new char[size]);
+    is.read(reinterpret_cast<char *>(buf.get()), size);
+    //    PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
+    //                   "Cannot parse tensor desc");
+  }
+  { // read tensor
+    std::vector<int64_t> dims;
+    dims.reserve(static_cast<size_t>(desc.dims().size()));
+    std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
+    tensor->Resize(framework::make_ddim(dims));
+    void *buf;
+
+    framework::VisitDataType(desc.data_type(),
+                             DeserializedDataFunctor(&buf, tensor));
+    is.read(static_cast<char *>(buf), tensor->memory_size());
+  }
 }

 } // namespace framework

--- a/src/framework/tensor_util.h
+++ b/src/framework/tensor_util.h
@@ -43,23 +43,23 @@ void TensorFromStream(std::istream &is, Tensor *tensor);

 template <typename T>
 void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
-    auto src_ptr = static_cast<const void *>(src.data());
-    dst->Resize({static_cast<int64_t>(src.size())});
-    auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
-    auto size = src.size() * sizeof(T);
+  auto src_ptr = static_cast<const void *>(src.data());
+  dst->Resize({static_cast<int64_t>(src.size())});
+  auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
+  auto size = src.size() * sizeof(T);

-    memory::Copy(dst_ptr, src_ptr, size);
+  memory::Copy(dst_ptr, src_ptr, size);
 }

 template <typename T>
 void TensorToVector(const Tensor &src, std::vector<T> *dst) {
-    auto src_ptr = static_cast<const void *>(src.data<T>());
-    auto size = src.numel() * sizeof(T);
+  auto src_ptr = static_cast<const void *>(src.data<T>());
+  auto size = src.numel() * sizeof(T);

-    dst->resize(src.numel());
-    auto dst_ptr = static_cast<void *>(dst->data());
+  dst->resize(src.numel());
+  auto dst_ptr = static_cast<void *>(dst->data());

-    memory::Copy(dst_ptr, src_ptr, size);
+  memory::Copy(dst_ptr, src_ptr, size);
 }

 } // namespace framework

--- a/src/framework/var_desc.h
+++ b/src/framework/var_desc.h
@@ -25,63 +25,63 @@ namespace paddle_mobile {
 namespace framework {

 class VarDesc {
-  public:
-    VarDesc(const proto::VarDesc &desc);
+public:
+  VarDesc(const proto::VarDesc &desc);

-    std::string Name() const { return desc_.name(); }
+  std::string Name() const { return desc_.name(); }

-    proto::VarType::Type GetType() const { return desc_.type().type(); }
+  proto::VarType::Type GetType() const { return desc_.type().type(); }

-    bool Persistable() const { return desc_.persistable(); }
+  bool Persistable() const { return desc_.persistable(); }

-    const proto::VarType::ChannelDesc &channel_desc() const {
-        switch (desc_.type().type()) {
-        case proto::VarType::CHANNEL:
-            return desc_.type().channel();
-        default:
-            break;
-        }
+  const proto::VarType::ChannelDesc &channel_desc() const {
+    switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      return desc_.type().channel();
+    default:
+      break;
    }
+  }

-    const proto::VarType::TensorDesc &tensor_desc() const {
-        switch (desc_.type().type()) {
-        case proto::VarType::SELECTED_ROWS:
-            return desc_.type().selected_rows();
-        case proto::VarType::LOD_TENSOR:
-            return desc_.type().lod_tensor().tensor();
-        case proto::VarType::LOD_TENSOR_ARRAY:
-            return desc_.type().tensor_array().tensor();
-        default:
-            break;
-        }
+  const proto::VarType::TensorDesc &tensor_desc() const {
+    switch (desc_.type().type()) {
+    case proto::VarType::SELECTED_ROWS:
+      return desc_.type().selected_rows();
+    case proto::VarType::LOD_TENSOR:
+      return desc_.type().lod_tensor().tensor();
+    case proto::VarType::LOD_TENSOR_ARRAY:
+      return desc_.type().tensor_array().tensor();
+    default:
+      break;
    }
+  }

-    proto::VarType::Type GetDataType() const {
-        switch (desc_.type().type()) {
-        case proto::VarType::CHANNEL:
-            return channel_desc().data_type();
-            break;
-        default:
-            return tensor_desc().data_type();
-        }
+  proto::VarType::Type GetDataType() const {
+    switch (desc_.type().type()) {
+    case proto::VarType::CHANNEL:
+      return channel_desc().data_type();
+      break;
+    default:
+      return tensor_desc().data_type();
    }
+  }

-    template <typename T>
-    std::vector<T> RepeatedToVector(
-        const google::protobuf::RepeatedField<T> &repeated_field) const {
-        std::vector<T> ret;
-        ret.reserve(repeated_field.size());
-        std::copy(repeated_field.begin(), repeated_field.end(),
-                  std::back_inserter(ret));
-        return ret;
-    }
+  template <typename T>
+  std::vector<T> RepeatedToVector(
+      const google::protobuf::RepeatedField<T> &repeated_field) const {
+    std::vector<T> ret;
+    ret.reserve(repeated_field.size());
+    std::copy(repeated_field.begin(), repeated_field.end(),
+              std::back_inserter(ret));
+    return ret;
+  }

-    std::vector<int64_t> GetShape() const {
-        return this->RepeatedToVector(tensor_desc().dims());
-    }
+  std::vector<int64_t> GetShape() const {
+    return this->RepeatedToVector(tensor_desc().dims());
+  }

-  private:
-    proto::VarDesc desc_;
+private:
+  proto::VarDesc desc_;
 };

 } // namespace framework

--- a/src/framework/var_type.h
+++ b/src/framework/var_type.h
@@ -25,14 +25,14 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace framework {
 inline proto::VarType::Type ToVarType(std::type_index type) {
-    if (type.hash_code() == typeid(LoDTensor).hash_code()) {
-        return proto::VarType_Type_LOD_TENSOR;
-    } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
-        return proto::VarType_Type_SELECTED_ROWS;
-    } else {
-        //    PADDLE_THROW("ToVarType:Unsupported type %s",
-        //    type.name());
-    }
+  if (type.hash_code() == typeid(LoDTensor).hash_code()) {
+    return proto::VarType_Type_LOD_TENSOR;
+  } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
+    return proto::VarType_Type_SELECTED_ROWS;
+  } else {
+    //    PADDLE_THROW("ToVarType:Unsupported type %s",
+    //    type.name());
+  }
 }

 } // namespace framework

--- a/src/framework/variable.h
+++ b/src/framework/variable.h
@@ -28,69 +28,69 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace framework {
 class Variable : public PaddleMobileObject {
-  public:
-    template <typename T> const T *Get() const {
-        return static_cast<const T *>(holder_->Ptr());
+public:
+  template <typename T> const T *Get() const {
+    return static_cast<const T *>(holder_->Ptr());
+  }
+
+  bool IsInitialized() const { return holder_ != nullptr; }
+
+  const std::string *Name() { return name_; }
+
+  template <typename T> T *GetMutable() {
+    if (!IsType<T>()) {
+      if (*Name() == "pixel") {
+        //        std::cout << " reset " << *Name() <<
+        //        std::endl;
+      }
+      holder_.reset(new PlaceholderImp<T>(new T()));
    }
-
-    bool IsInitialized() const { return holder_ != nullptr; }
-
-    const std::string *Name() { return name_; }
-
-    template <typename T> T *GetMutable() {
-        if (!IsType<T>()) {
-            if (*Name() == "pixel") {
-                //        std::cout << " reset " << *Name() <<
-                //        std::endl;
-            }
-            holder_.reset(new PlaceholderImp<T>(new T()));
-        }
-        return static_cast<T *>(holder_->Ptr());
+    return static_cast<T *>(holder_->Ptr());
+  }
+
+  template <typename T> bool IsType() const {
+    if (holder_) {
+      //                printf("not null \n");
+      printf(" holder type : %s, this type %s \n", holder_->Type().name(),
+             typeid(T).name());
    }

-    template <typename T> bool IsType() const {
-        if (holder_) {
-            //                printf("not null \n");
-            printf(" holder type : %s, this type %s \n", holder_->Type().name(),
-                   typeid(T).name());
-        }
-
-        //              std::cout << " " << holder_->Type() << " " <<
-        //              typeid(T) <<
-        //              std::endl;
-        return holder_ != nullptr && holder_->Type() == typeid(T);
-    }
+    //              std::cout << " " << holder_->Type() << " " <<
+    //              typeid(T) <<
+    //              std::endl;
+    return holder_ != nullptr && holder_->Type() == typeid(T);
+  }

-    void Clear() { holder_.reset(); }
+  void Clear() { holder_.reset(); }

-    std::type_index Type() const { return holder_->Type(); }
+  std::type_index Type() const { return holder_->Type(); }

-    void SetName(const std::string *name) { name_ = name; }
+  void SetName(const std::string *name) { name_ = name; }

-  private:
-    struct Placeholder {
-        Placeholder() = default;
-        virtual ~Placeholder() = default;
+private:
+  struct Placeholder {
+    Placeholder() = default;
+    virtual ~Placeholder() = default;

-        virtual const std::type_info &Type() const = 0;
-        virtual void *Ptr() const = 0;
-    };
+    virtual const std::type_info &Type() const = 0;
+    virtual void *Ptr() const = 0;
+  };

-    template <typename T> struct PlaceholderImp : public Placeholder {
-        explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}
+  template <typename T> struct PlaceholderImp : public Placeholder {
+    explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}

-        virtual const std::type_info &Type() const { return type_; }
-        virtual void *Ptr() const override {
-            return static_cast<void *>(ptr_.get());
-        }
+    virtual const std::type_info &Type() const { return type_; }
+    virtual void *Ptr() const override {
+      return static_cast<void *>(ptr_.get());
+    }

-        std::unique_ptr<T> ptr_;
-        const std::type_info &type_;
-    };
+    std::unique_ptr<T> ptr_;
+    const std::type_info &type_;
+  };

-    std::unique_ptr<Placeholder> holder_;
-    friend class Scope;
-    const std::string *name_;
+  std::unique_ptr<Placeholder> holder_;
+  friend class Scope;
+  const std::string *name_;
 };
 } // namespace framework
 } // namespace paddle_mobile
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -29,361 +29,357 @@ SOFTWARE.
 namespace paddle_mobile {

 void ReadBinaryFile(const std::string &filename, std::string *contents) {
-    std::ifstream fin(filename, std::ios::in | std::ios::binary);
-    fin.seekg(0, std::ios::end);
-    contents->clear();
-    contents->resize(fin.tellg());
-    fin.seekg(0, std::ios::beg);
-    fin.read(&(contents->at(0)), contents->size());
-    fin.close();
+  std::ifstream fin(filename, std::ios::in | std::ios::binary);
+  fin.seekg(0, std::ios::end);
+  contents->clear();
+  contents->resize(fin.tellg());
+  fin.seekg(0, std::ios::beg);
+  fin.read(&(contents->at(0)), contents->size());
+  fin.close();
 }

 template <typename Dtype, Precision P>
 void Loader<Dtype, P>::LoadVar(framework::LoDTensor *tensor,
                               const std::string &file_path) {
-    //        LOG(kLOG_DEBUG) << "  to load " << file_path;
-    //  Log(kLOG_DEBUG) << "123";
-
-    std::ifstream is(file_path);
-
-    std::streampos pos = is.tellg(); //   save   current   position
-    is.seekg(0, std::ios::end);
-    //        LOG(kLOG_DEBUG) << "  file length = " << is.tellg();
-    is.seekg(pos); //   restore   saved   position
-
-    // 1. version
-    uint32_t version;
-    is.read(reinterpret_cast<char *>(&version), sizeof(version));
-    //        LOG(kLOG_INFO) << "   version: " << version;
-
-    // 2 Lod information
-    uint64_t lod_level;
-    is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
-    //        LOG(kLOG_DEBUG) << "   load level: " << lod_level;
-    //        LOG(kLOG_DEBUG) << "   lod info: ";
-    auto &lod = *tensor->mutable_lod();
-    lod.resize(lod_level);
-    for (uint64_t i = 0; i < lod_level; ++i) {
-        uint64_t size;
-        is.read(reinterpret_cast<char *>(&size), sizeof(size));
-        std::vector<size_t> tmp(size / sizeof(size_t));
-        is.read(reinterpret_cast<char *>(tmp.data()),
-                static_cast<std::streamsize>(size));
-        for (int j = 0; j < tmp.size(); ++j) {
-            LOG(kLOG_DEBUG1) << "    lod - " << tmp[j];
-        }
-        lod[i] = tmp;
-    }
-
-    // 3. tensor version
-    uint32_t tensor_version;
-    is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
-    //  std::cout << "   tensor_version: " << tensor_version << std::endl;
-
-    // 4. tensor desc
-    int32_t size;
+  //        LOG(kLOG_DEBUG) << "  to load " << file_path;
+  //  Log(kLOG_DEBUG) << "123";
+
+  std::ifstream is(file_path);
+
+  std::streampos pos = is.tellg(); //   save   current   position
+  is.seekg(0, std::ios::end);
+  //        LOG(kLOG_DEBUG) << "  file length = " << is.tellg();
+  is.seekg(pos); //   restore   saved   position
+
+  // 1. version
+  uint32_t version;
+  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  //        LOG(kLOG_INFO) << "   version: " << version;
+
+  // 2 Lod information
+  uint64_t lod_level;
+  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+  //        LOG(kLOG_DEBUG) << "   load level: " << lod_level;
+  //        LOG(kLOG_DEBUG) << "   lod info: ";
+  auto &lod = *tensor->mutable_lod();
+  lod.resize(lod_level);
+  for (uint64_t i = 0; i < lod_level; ++i) {
+    uint64_t size;
    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-    //  std::cout << "   tensor desc size: " << size << std::endl;
-    std::unique_ptr<char[]> buf(new char[size]);
-    is.read(reinterpret_cast<char *>(buf.get()), size);
-
-    framework::proto::VarType::TensorDesc desc;
-    desc.ParseFromArray(buf.get(), size);
-
-    //  std::cout << "   desc dims size " << desc.dims().size() <<
-    //  std::endl;
-    int memory_size = 1;
-    for (int l = 0; l < desc.dims().size(); ++l) {
-        //    std::cout << "    dim " << l << " value: " << desc.dims()[l]
-        //    <<
-        //    std::endl;
-        memory_size *= desc.dims()[l];
-    }
-
-    std::vector<int64_t> dims;
-    dims.reserve(static_cast<size_t>(desc.dims().size()));
-    std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
-    tensor->Resize(framework::make_ddim(dims));
-
-    void *memory;
-    int type_size = 0;
-    //  std::cout << "    desc pre type: ";
-    switch (desc.data_type()) {
-    case framework::proto::VarType::FP16:
-        //      std::cout << "FP16" << std::endl;
-        type_size = 2;
-        break;
-    case framework::proto::VarType::FP32:
-        type_size = 4;
-        memory = tensor->mutable_data<float>();
-        //      std::cout << "FP32" << std::endl;
-        break;
-    case framework::proto::VarType::FP64:
-        type_size = 8;
-        //      std::cout << "FP64" << std::endl;
-        break;
-    case framework::proto::VarType::INT32:
-        type_size = 4;
-        //      std::cout << "INT32" << std::endl;
-        break;
-    case framework::proto::VarType::INT64:
-        type_size = 8;
-        //      std::cout << "INT64" << std::endl;
-        break;
-    case framework::proto::VarType::BOOL:
-        type_size = 1;
-        //      std::cout << "BOOL" << std::endl;
-        break;
-    default:
-        break;
-        //      std::cout << "    not support" << std::endl;
+    std::vector<size_t> tmp(size / sizeof(size_t));
+    is.read(reinterpret_cast<char *>(tmp.data()),
+            static_cast<std::streamsize>(size));
+    for (int j = 0; j < tmp.size(); ++j) {
+      LOG(kLOG_DEBUG1) << "    lod - " << tmp[j];
    }
-
-    //  std::cout << "    malloc size: " << memory_size * type_size <<
-    //  std::endl;
-    is.read(static_cast<char *>(memory), memory_size * type_size);
-    //  std::cout << "    memory: " << memory << std::endl;
-    is.close();
+    lod[i] = tmp;
+  }
+
+  // 3. tensor version
+  uint32_t tensor_version;
+  is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
+  //  std::cout << "   tensor_version: " << tensor_version << std::endl;
+
+  // 4. tensor desc
+  int32_t size;
+  is.read(reinterpret_cast<char *>(&size), sizeof(size));
+  //  std::cout << "   tensor desc size: " << size << std::endl;
+  std::unique_ptr<char[]> buf(new char[size]);
+  is.read(reinterpret_cast<char *>(buf.get()), size);
+
+  framework::proto::VarType::TensorDesc desc;
+  desc.ParseFromArray(buf.get(), size);
+
+  //  std::cout << "   desc dims size " << desc.dims().size() <<
+  //  std::endl;
+  int memory_size = 1;
+  for (int l = 0; l < desc.dims().size(); ++l) {
+    //    std::cout << "    dim " << l << " value: " << desc.dims()[l]
+    //    <<
+    //    std::endl;
+    memory_size *= desc.dims()[l];
+  }
+
+  std::vector<int64_t> dims;
+  dims.reserve(static_cast<size_t>(desc.dims().size()));
+  std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
+  tensor->Resize(framework::make_ddim(dims));
+
+  void *memory;
+  int type_size = 0;
+  //  std::cout << "    desc pre type: ";
+  switch (desc.data_type()) {
+  case framework::proto::VarType::FP16:
+    //      std::cout << "FP16" << std::endl;
+    type_size = 2;
+    break;
+  case framework::proto::VarType::FP32:
+    type_size = 4;
+    memory = tensor->mutable_data<float>();
+    //      std::cout << "FP32" << std::endl;
+    break;
+  case framework::proto::VarType::FP64:
+    type_size = 8;
+    //      std::cout << "FP64" << std::endl;
+    break;
+  case framework::proto::VarType::INT32:
+    type_size = 4;
+    //      std::cout << "INT32" << std::endl;
+    break;
+  case framework::proto::VarType::INT64:
+    type_size = 8;
+    //      std::cout << "INT64" << std::endl;
+    break;
+  case framework::proto::VarType::BOOL:
+    type_size = 1;
+    //      std::cout << "BOOL" << std::endl;
+    break;
+  default:
+    break;
+    //      std::cout << "    not support" << std::endl;
+  }
+
+  //  std::cout << "    malloc size: " << memory_size * type_size <<
+  //  std::endl;
+  is.read(static_cast<char *>(memory), memory_size * type_size);
+  //  std::cout << "    memory: " << memory << std::endl;
+  is.close();
 };

 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P>
 Loader<Dtype, P>::Load(const std::string &dirname) {
-    std::string model_filename = dirname + "/__model__";
-    std::string program_desc_str;
-    ReadBinaryFile(model_filename, &program_desc_str);
-    framework::proto::ProgramDesc program_desc_proto;
-    program_desc_proto.ParseFromString(program_desc_str);
-
-    std::shared_ptr<framework::ProgramDesc> originProgramDesc =
-        std::make_shared<framework::ProgramDesc>(program_desc_proto);
-
-    framework::Program<Dtype, P> program;
-    program.originProgram = originProgramDesc;
-
-    std::shared_ptr<framework::Scope> scope =
-        std::make_shared<framework::Scope>();
-    program.scope = scope;
-
-    auto block = originProgramDesc->Block(0);
-
-    for (auto block : originProgramDesc->Blocks()) {
-        //    std::cout << "for block" << std::endl;
-        for (int i = 0; i < block->Vars().size(); ++i) {
-            std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
-            auto var = scope->Var(var_desc->Name());
-            if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
-                if (var_desc->Persistable() &&
-                    var_desc->GetType() !=
-                        framework::proto::VarType::FEED_MINIBATCH &&
-                    var_desc->GetType() !=
-                        framework::proto::VarType::FETCH_LIST) {
-                    framework::LoDTensor *tensor =
-                        var->GetMutable<framework::LoDTensor>();
-                    // to load
-                    LoadVar(tensor, dirname + "/" + var_desc->Name());
-                }
-            } else {
-                //        std::cout << "非 lod" << std::endl;
-            }
+  std::string model_filename = dirname + "/__model__";
+  std::string program_desc_str;
+  ReadBinaryFile(model_filename, &program_desc_str);
+  framework::proto::ProgramDesc program_desc_proto;
+  program_desc_proto.ParseFromString(program_desc_str);
+
+  std::shared_ptr<framework::ProgramDesc> originProgramDesc =
+      std::make_shared<framework::ProgramDesc>(program_desc_proto);
+
+  framework::Program<Dtype, P> program;
+  program.originProgram = originProgramDesc;
+
+  std::shared_ptr<framework::Scope> scope =
+      std::make_shared<framework::Scope>();
+  program.scope = scope;
+
+  auto block = originProgramDesc->Block(0);
+
+  for (auto block : originProgramDesc->Blocks()) {
+    //    std::cout << "for block" << std::endl;
+    for (int i = 0; i < block->Vars().size(); ++i) {
+      std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
+      auto var = scope->Var(var_desc->Name());
+      if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
+        if (var_desc->Persistable() &&
+            var_desc->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
+            var_desc->GetType() != framework::proto::VarType::FETCH_LIST) {
+          framework::LoDTensor *tensor =
+              var->GetMutable<framework::LoDTensor>();
+          // to load
+          LoadVar(tensor, dirname + "/" + var_desc->Name());
        }
+      } else {
+        //        std::cout << "非 lod" << std::endl;
+      }
    }
+  }

 #ifdef PADDLE_MOBILE_DEBUG
-    for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
-        framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
-        LOG(kLOG_DEBUG) << "block: " << block.idx();
-        for (int j = 0; j < block.ops().size(); ++j) {
-            if (j == 2) {
-                break;
-            }
-            framework::proto::OpDesc op = block.ops()[j];
-            LOG(kLOG_DEBUG1) << "op: " << op.type();
-            for (int m = 0; m < op.inputs_size(); ++m) {
-                const framework::proto::OpDesc::Var &var = op.inputs(m);
-                LOG(kLOG_DEBUG2) << "input parameter: " << var.parameter();
-                for (int n = 0; n < var.arguments().size(); ++n) {
-                    LOG(kLOG_DEBUG3) << "argument - " << var.arguments()[n];
-                }
-            }
-
-            for (int y = 0; y < op.outputs_size(); ++y) {
-                const framework::proto::OpDesc::Var &var = op.outputs(y);
-                LOG(kLOG_DEBUG2) << "out parameter: " << var.parameter();
-                for (int z = 0; z < var.arguments().size(); ++z) {
-                    LOG(kLOG_DEBUG3) << "argument - " << var.arguments()[z];
-                }
-            }
-
-            for (int x = 0; x < op.attrs().size(); ++x) {
-                const framework::proto::OpDesc_Attr attr = op.attrs()[x];
-                LOG(kLOG_DEBUG2) << "attr name: " << attr.name();
-
-                switch (attr.type()) {
-                case framework::proto::AttrType::BOOLEAN:
-                    LOG(kLOG_DEBUG3) << "boolen: " << attr.b();
-                    break;
-                case framework::proto::AttrType::INT:
-                    LOG(kLOG_DEBUG3) << "int: " << attr.i();
-                    break;
-                case framework::proto::AttrType::FLOAT:
-                    LOG(kLOG_DEBUG3) << "float: " << attr.f();
-                case framework::proto::AttrType::STRING:
-                    LOG(kLOG_DEBUG3) << "string: " << attr.s();
-                case framework::proto::AttrType::BOOLEANS:
-                    for (int y = 0; y < attr.bools_size(); ++y) {
-                        LOG(kLOG_DEBUG3) << "bools: " << attr.bools(y);
-                    }
-                case framework::proto::AttrType::LONG:
-                    LOG(kLOG_DEBUG3) << "long: " << attr.l();
-                case framework::proto::AttrType::FLOATS:
-                    for (int y = 0; y < attr.floats_size(); ++y) {
-                        LOG(kLOG_DEBUG3) << "floats: " << attr.floats(y);
-                    }
-                case framework::proto::AttrType::INTS:
-                    for (int y = 0; y < attr.ints_size(); ++y) {
-                        LOG(kLOG_DEBUG3) << "ints: " << attr.ints(y);
-                    }
-                case framework::proto::AttrType::STRINGS:
-                    for (int y = 0; y < attr.strings_size(); ++y) {
-                        LOG(kLOG_DEBUG3) << "strings: " << attr.strings(y);
-                    }
-                }
-            }
+  for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
+    framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
+    LOG(kLOG_DEBUG) << "block: " << block.idx();
+    for (int j = 0; j < block.ops().size(); ++j) {
+      if (j == 2) {
+        break;
+      }
+      framework::proto::OpDesc op = block.ops()[j];
+      LOG(kLOG_DEBUG1) << "op: " << op.type();
+      for (int m = 0; m < op.inputs_size(); ++m) {
+        const framework::proto::OpDesc::Var &var = op.inputs(m);
+        LOG(kLOG_DEBUG2) << "input parameter: " << var.parameter();
+        for (int n = 0; n < var.arguments().size(); ++n) {
+          LOG(kLOG_DEBUG3) << "argument - " << var.arguments()[n];
+        }
+      }
+
+      for (int y = 0; y < op.outputs_size(); ++y) {
+        const framework::proto::OpDesc::Var &var = op.outputs(y);
+        LOG(kLOG_DEBUG2) << "out parameter: " << var.parameter();
+        for (int z = 0; z < var.arguments().size(); ++z) {
+          LOG(kLOG_DEBUG3) << "argument - " << var.arguments()[z];
+        }
+      }
+
+      for (int x = 0; x < op.attrs().size(); ++x) {
+        const framework::proto::OpDesc_Attr attr = op.attrs()[x];
+        LOG(kLOG_DEBUG2) << "attr name: " << attr.name();
+
+        switch (attr.type()) {
+        case framework::proto::AttrType::BOOLEAN:
+          LOG(kLOG_DEBUG3) << "boolen: " << attr.b();
+          break;
+        case framework::proto::AttrType::INT:
+          LOG(kLOG_DEBUG3) << "int: " << attr.i();
+          break;
+        case framework::proto::AttrType::FLOAT:
+          LOG(kLOG_DEBUG3) << "float: " << attr.f();
+        case framework::proto::AttrType::STRING:
+          LOG(kLOG_DEBUG3) << "string: " << attr.s();
+        case framework::proto::AttrType::BOOLEANS:
+          for (int y = 0; y < attr.bools_size(); ++y) {
+            LOG(kLOG_DEBUG3) << "bools: " << attr.bools(y);
+          }
+        case framework::proto::AttrType::LONG:
+          LOG(kLOG_DEBUG3) << "long: " << attr.l();
+        case framework::proto::AttrType::FLOATS:
+          for (int y = 0; y < attr.floats_size(); ++y) {
+            LOG(kLOG_DEBUG3) << "floats: " << attr.floats(y);
+          }
+        case framework::proto::AttrType::INTS:
+          for (int y = 0; y < attr.ints_size(); ++y) {
+            LOG(kLOG_DEBUG3) << "ints: " << attr.ints(y);
+          }
+        case framework::proto::AttrType::STRINGS:
+          for (int y = 0; y < attr.strings_size(); ++y) {
+            LOG(kLOG_DEBUG3) << "strings: " << attr.strings(y);
+          }
+        }
+      }
+    }
+
+    for (int k = 0; k < block.vars().size(); ++k) {
+      framework::proto::VarDesc var = block.vars()[k];
+      if (var.type().type() == framework::proto::VarType::LOD_TENSOR) {
+        LOG(kLOG_DEBUG1) << "var name: " << var.name();
+        const framework::proto::VarType::TensorDesc &tensor_desc =
+            var.type().lod_tensor().tensor();
+        LOG(kLOG_DEBUG2) << "in var tensor desc dims size: "
+                         << tensor_desc.dims().size();
+        int memory_size = 1;
+        for (int l = 0; l < tensor_desc.dims().size(); ++l) {
+          LOG(kLOG_DEBUG3) << "var tensor desc dim " << l
+                           << " value: " << tensor_desc.dims()[l];
+        }
+      }
+
+      if (var.persistable() &&
+          var.type().type() != framework::proto::VarType::FEED_MINIBATCH &&
+          var.type().type() != framework::proto::VarType::FETCH_LIST) {
+        //        std::cout << "  to load " << var.name() <<
+        //        std::endl;
+        std::string file_path = dirname + "/" + var.name();
+        std::ifstream is(file_path);
+        std::streampos pos = is.tellg(); //   save   current   position
+        is.seekg(0, std::ios::end);
+        //        std::cout << "  file length = " << is.tellg() <<
+        //        std::endl;
+        is.seekg(pos); //   restore   saved   position
+
+        // 1. version
+        uint32_t version;
+        is.read(reinterpret_cast<char *>(&version), sizeof(version));
+        //        std::cout << "   version: " << version <<
+        //        std::endl;
+
+        // 2 Lod information
+        uint64_t lod_level;
+        is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+        //        std::cout << "   load level: " << lod_level <<
+        //        std::endl;
+        //        std::cout << "   lod info: " << std::endl;
+        for (uint64_t i = 0; i < lod_level; ++i) {
+          uint64_t size;
+          is.read(reinterpret_cast<char *>(&size), sizeof(size));
+          std::vector<size_t> tmp(size / sizeof(size_t));
+          is.read(reinterpret_cast<char *>(tmp.data()),
+                  static_cast<std::streamsize>(size));
+          for (int j = 0; j < tmp.size(); ++j) {
+            //            std::cout << "    lod - " << tmp[j] <<
+            //            std::endl;
+          }
        }

-        for (int k = 0; k < block.vars().size(); ++k) {
-            framework::proto::VarDesc var = block.vars()[k];
-            if (var.type().type() == framework::proto::VarType::LOD_TENSOR) {
-                LOG(kLOG_DEBUG1) << "var name: " << var.name();
-                const framework::proto::VarType::TensorDesc &tensor_desc =
-                    var.type().lod_tensor().tensor();
-                LOG(kLOG_DEBUG2) << "in var tensor desc dims size: "
-                                 << tensor_desc.dims().size();
-                int memory_size = 1;
-                for (int l = 0; l < tensor_desc.dims().size(); ++l) {
-                    LOG(kLOG_DEBUG3) << "var tensor desc dim " << l
-                                     << " value: " << tensor_desc.dims()[l];
-                }
-            }
-
-            if (var.persistable() &&
-                var.type().type() !=
-                    framework::proto::VarType::FEED_MINIBATCH &&
-                var.type().type() != framework::proto::VarType::FETCH_LIST) {
-                //        std::cout << "  to load " << var.name() <<
-                //        std::endl;
-                std::string file_path = dirname + "/" + var.name();
-                std::ifstream is(file_path);
-                std::streampos pos = is.tellg(); //   save   current   position
-                is.seekg(0, std::ios::end);
-                //        std::cout << "  file length = " << is.tellg() <<
-                //        std::endl;
-                is.seekg(pos); //   restore   saved   position
-
-                // 1. version
-                uint32_t version;
-                is.read(reinterpret_cast<char *>(&version), sizeof(version));
-                //        std::cout << "   version: " << version <<
-                //        std::endl;
-
-                // 2 Lod information
-                uint64_t lod_level;
-                is.read(reinterpret_cast<char *>(&lod_level),
-                        sizeof(lod_level));
-                //        std::cout << "   load level: " << lod_level <<
-                //        std::endl;
-                //        std::cout << "   lod info: " << std::endl;
-                for (uint64_t i = 0; i < lod_level; ++i) {
-                    uint64_t size;
-                    is.read(reinterpret_cast<char *>(&size), sizeof(size));
-                    std::vector<size_t> tmp(size / sizeof(size_t));
-                    is.read(reinterpret_cast<char *>(tmp.data()),
-                            static_cast<std::streamsize>(size));
-                    for (int j = 0; j < tmp.size(); ++j) {
-                        //            std::cout << "    lod - " << tmp[j] <<
-                        //            std::endl;
-                    }
-                }
-
-                uint32_t tensor_version;
-                is.read(reinterpret_cast<char *>(&version), sizeof(version));
-                //        std::cout << "   tensor_version: " <<
-                //        tensor_version <<
-                //        std::endl;
-
-                int32_t size;
-                is.read(reinterpret_cast<char *>(&size), sizeof(size));
-                //        std::cout << "   tensor desc size: " << size <<
-                //        std::endl;
-                std::unique_ptr<char[]> buf(new char[size]);
-                is.read(reinterpret_cast<char *>(buf.get()), size);
-
-                framework::proto::VarType::TensorDesc desc;
-                desc.ParseFromArray(buf.get(), size);
-
-                //        std::cout << "   desc dims size " <<
-                //        desc.dims().size() <<
-                //        std::endl;
-                int memory_size = 1;
-                for (int l = 0; l < desc.dims().size(); ++l) {
-                    //          std::cout << "    dim " << l << " value: "
-                    //          <<
-                    //          desc.dims()[l]
-                    //                    << std::endl;
-                    memory_size *= desc.dims()[l];
-                }
-
-                int type_size = 0;
-                //        std::cout << "    desc pre type: ";
-                switch (desc.data_type()) {
-                case framework::proto::VarType::FP16:
-                    //            std::cout << "FP16" << std::endl;
-                    type_size = 2;
-                    break;
-                case framework::proto::VarType::FP32:
-                    type_size = 4;
-                    //            std::cout << "FP32" << std::endl;
-                    break;
-                case framework::proto::VarType::FP64:
-                    type_size = 8;
-                    //            std::cout << "FP64" << std::endl;
-                    break;
-                case framework::proto::VarType::INT32:
-                    type_size = 4;
-                    //            std::cout << "INT32" << std::endl;
-                    break;
-                case framework::proto::VarType::INT64:
-                    type_size = 8;
-                    //            std::cout << "INT64" << std::endl;
-                    break;
-                case framework::proto::VarType::BOOL:
-                    type_size = 1;
-                    //            std::cout << "BOOL" << std::endl;
-                    break;
-                default:
-                    break;
-                    //            std::cout << "    not support" <<
-                    //            std::endl;
-                }
-
-                //        std::cout << "    malloc size: " << memory_size *
-                //        type_size
-                //                  << std::endl;
-                void *memory = malloc(memory_size * type_size);
-                is.read(static_cast<char *>(memory), memory_size * type_size);
-                //        std::cout << "    memory: " << memory <<
-                //        std::endl;
-                is.close();
-            } else {
-                //        std::cout << "  *not load "
-                //                  << " var : " << var.name() << std::endl;
-            }
+        uint32_t tensor_version;
+        is.read(reinterpret_cast<char *>(&version), sizeof(version));
+        //        std::cout << "   tensor_version: " <<
+        //        tensor_version <<
+        //        std::endl;
+
+        int32_t size;
+        is.read(reinterpret_cast<char *>(&size), sizeof(size));
+        //        std::cout << "   tensor desc size: " << size <<
+        //        std::endl;
+        std::unique_ptr<char[]> buf(new char[size]);
+        is.read(reinterpret_cast<char *>(buf.get()), size);
+
+        framework::proto::VarType::TensorDesc desc;
+        desc.ParseFromArray(buf.get(), size);
+
+        //        std::cout << "   desc dims size " <<
+        //        desc.dims().size() <<
+        //        std::endl;
+        int memory_size = 1;
+        for (int l = 0; l < desc.dims().size(); ++l) {
+          //          std::cout << "    dim " << l << " value: "
+          //          <<
+          //          desc.dims()[l]
+          //                    << std::endl;
+          memory_size *= desc.dims()[l];
        }
+
+        int type_size = 0;
+        //        std::cout << "    desc pre type: ";
+        switch (desc.data_type()) {
+        case framework::proto::VarType::FP16:
+          //            std::cout << "FP16" << std::endl;
+          type_size = 2;
+          break;
+        case framework::proto::VarType::FP32:
+          type_size = 4;
+          //            std::cout << "FP32" << std::endl;
+          break;
+        case framework::proto::VarType::FP64:
+          type_size = 8;
+          //            std::cout << "FP64" << std::endl;
+          break;
+        case framework::proto::VarType::INT32:
+          type_size = 4;
+          //            std::cout << "INT32" << std::endl;
+          break;
+        case framework::proto::VarType::INT64:
+          type_size = 8;
+          //            std::cout << "INT64" << std::endl;
+          break;
+        case framework::proto::VarType::BOOL:
+          type_size = 1;
+          //            std::cout << "BOOL" << std::endl;
+          break;
+        default:
+          break;
+          //            std::cout << "    not support" <<
+          //            std::endl;
+        }
+
+        //        std::cout << "    malloc size: " << memory_size *
+        //        type_size
+        //                  << std::endl;
+        void *memory = malloc(memory_size * type_size);
+        is.read(static_cast<char *>(memory), memory_size * type_size);
+        //        std::cout << "    memory: " << memory <<
+        //        std::endl;
+        is.close();
+      } else {
+        //        std::cout << "  *not load "
+        //                  << " var : " << var.name() << std::endl;
+      }
    }
+  }

 #endif
-    return program;
+  return program;
 }

 template class Loader<CPU, Precision::FP32>;

--- a/src/io.h
+++ b/src/io.h
@@ -29,11 +29,11 @@ namespace paddle_mobile {

 template <typename Dtype, Precision P = Precision::FP32>
 class Loader : PaddleMobileObject {
-  public:
-    const framework::Program<Dtype, P> Load(const std::string &dirname);
+public:
+  const framework::Program<Dtype, P> Load(const std::string &dirname);

-  private:
-    void LoadVar(framework::LoDTensor *tensor, const std::string &file_path);
+private:
+  void LoadVar(framework::LoDTensor *tensor, const std::string &file_path);
 };

 } // namespace paddle_mobile
--- a/src/memory/t_malloc.cc
+++ b/src/memory/t_malloc.cc
@@ -26,25 +26,25 @@ namespace memory {
 const int MALLOC_ALIGN = 16;

 void Copy(void *dst, const void *src, size_t num) {
-    std::memcpy(dst, src, num);
+  std::memcpy(dst, src, num);
 };

 void *Alloc(size_t size) {
-    size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
-    char *p = static_cast<char *>(malloc(offset + size));
-    if (!p) {
-        return nullptr;
-    }
-    void *r = reinterpret_cast<void *>(reinterpret_cast<size_t>(p + offset) &
-                                       (~(MALLOC_ALIGN - 1)));
-    static_cast<void **>(r)[-1] = p;
-    return r;
+  size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
+  char *p = static_cast<char *>(malloc(offset + size));
+  if (!p) {
+    return nullptr;
+  }
+  void *r = reinterpret_cast<void *>(reinterpret_cast<size_t>(p + offset) &
+                                     (~(MALLOC_ALIGN - 1)));
+  static_cast<void **>(r)[-1] = p;
+  return r;
 }

 void Free(void *ptr) {
-    if (ptr) {
-        free(static_cast<void **>(ptr)[-1]);
-    }
+  if (ptr) {
+    free(static_cast<void **>(ptr)[-1]);
+  }
 }

 } // namespace memory

--- a/src/memory/t_malloc.h
+++ b/src/memory/t_malloc.h
@@ -38,12 +38,12 @@ void Free(void *ptr);
 *          static_cast
 */
 template <typename T> class PODDeleter {
-    static_assert(std::is_pod<T>::value, "T must be POD");
+  static_assert(std::is_pod<T>::value, "T must be POD");

-  public:
-    explicit PODDeleter(){};
+public:
+  explicit PODDeleter(){};

-    void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
+  void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
 };

 /**
@@ -55,10 +55,10 @@ template <typename T> class PODDeleter {
 *          reinterpret_cast
 */
 template <typename T> class PlainDeleter {
-  public:
-    explicit PlainDeleter(){};
+public:
+  explicit PlainDeleter(){};

-    void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
+  void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
 };
 } // namespace memory
 } // namespace paddle_mobile
--- a/src/operators/batchnorm_op.cpp
+++ b/src/operators/batchnorm_op.cpp
@@ -23,8 +23,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void BatchNormOp<Dtype, T>::InferShape() const {
-    auto x_dims = param_.InputX()->dims();
-    param_.OutputY()->Resize(x_dims);
+  auto x_dims = param_.InputX()->dims();
+  param_.OutputY()->Resize(x_dims);
 }
 template class BatchNormOp<CPU, float>;
 } // namespace operators

--- a/src/operators/batchnorm_op.h
+++ b/src/operators/batchnorm_op.h
@@ -27,25 +27,25 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class BatchNormOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    BatchNormOp(const std::string &type, const VariableNameMap &inputs,
-                const VariableNameMap &outputs,
-                const framework::AttributeMap attrs,
-                std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
-
-    void Run() const {
-        operators::BatchNormKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-    }
-
-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
-
-  protected:
-    BatchNormParam param_;
+public:
+  BatchNormOp(const std::string &type, const VariableNameMap &inputs,
+              const VariableNameMap &outputs,
+              const framework::AttributeMap attrs,
+              std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  void Run() const {
+    operators::BatchNormKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+protected:
+  BatchNormParam param_;
 };

 } // namespace operators

--- a/src/operators/concat_op.cpp
+++ b/src/operators/concat_op.cpp
@@ -23,40 +23,40 @@ namespace operators {

 template <typename Dtype, typename T>
 void ConcatOp<Dtype, T>::InferShape() const {
-    auto inputs = param_.Inputs();
-    const size_t n = inputs.size();
+  auto inputs = param_.Inputs();
+  const size_t n = inputs.size();

-    std::vector<DDim> inputs_dims;
-    inputs_dims.reserve(n);
-    for (int i = 0; i < n; i++) {
-        inputs_dims.push_back(inputs[i]->dims());
-    }
+  std::vector<DDim> inputs_dims;
+  inputs_dims.reserve(n);
+  for (int i = 0; i < n; i++) {
+    inputs_dims.push_back(inputs[i]->dims());
+  }

-    auto axis = static_cast<size_t>(param_.Axis());
+  auto axis = static_cast<size_t>(param_.Axis());

-    if (n == 1) {
-        DLOG << "Warning: concat op have only one input, "
-                "may waste memory";
-    }
+  if (n == 1) {
+    DLOG << "Warning: concat op have only one input, "
+            "may waste memory";
+  }

-    /// add all dim[axis] and check other dims if equal.
-    auto out_dims = inputs_dims[0];
-    int in_zero_dims_size = out_dims.size();
-    for (size_t i = 1; i < n; i++) {
-        for (size_t j = 0; j < in_zero_dims_size; j++) {
-            if (j == axis) {
-                out_dims[axis] += inputs_dims[i][j];
-            } else {
-                assert(out_dims[j] == inputs_dims[i][j]);
-            }
-        }
+  /// add all dim[axis] and check other dims if equal.
+  auto out_dims = inputs_dims[0];
+  int in_zero_dims_size = out_dims.size();
+  for (size_t i = 1; i < n; i++) {
+    for (size_t j = 0; j < in_zero_dims_size; j++) {
+      if (j == axis) {
+        out_dims[axis] += inputs_dims[i][j];
+      } else {
+        assert(out_dims[j] == inputs_dims[i][j]);
+      }
    }
+  }

-    if (out_dims[axis] < 0) {
-        out_dims[axis] = -1;
-    }
+  if (out_dims[axis] < 0) {
+    out_dims[axis] = -1;
+  }

-    param_.Out()->Resize(out_dims);
+  param_.Out()->Resize(out_dims);
 }
 template class ConcatOp<CPU, float>;


--- a/src/operators/concat_op.h
+++ b/src/operators/concat_op.h
@@ -26,25 +26,24 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    ConcatOp(const std::string &type, const VariableNameMap &inputs,
-             const VariableNameMap &outputs,
-             const framework::AttributeMap attrs,
-             std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
+public:
+  ConcatOp(const std::string &type, const VariableNameMap &inputs,
+           const VariableNameMap &outputs, const framework::AttributeMap attrs,
+           std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}

-    void Run() const {
-        operators::ConcatKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-    }
+  void Run() const {
+    operators::ConcatKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }

-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;

-  protected:
-    ConcatParam param_;
+protected:
+  ConcatParam param_;
 };

 } // namespace operators

--- a/src/operators/conv_op.cpp
+++ b/src/operators/conv_op.cpp
@@ -25,48 +25,48 @@ namespace operators {

 int ConvOutputSize(int input_size, int filter_size, int dilation, int padding,
                   int stride) {
-    const int dkernel = dilation * (filter_size - 1) + 1;
-    int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
-    return output_size;
+  const int dkernel = dilation * (filter_size - 1) + 1;
+  int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
+  return output_size;
 }

 template <typename Dtype, typename T>
 void ConvOp<Dtype, T>::InferShape() const {
-    //  std::cout << " begin get dims: " << std::endl;
+  //  std::cout << " begin get dims: " << std::endl;

-    auto in_dims = param_.Input()->dims();
+  auto in_dims = param_.Input()->dims();

-    //  std::cout << " end get in dims: " << std::endl;
+  //  std::cout << " end get in dims: " << std::endl;

-    //  std::cout << " in_dims: " << in_dims << std::endl;
+  //  std::cout << " in_dims: " << in_dims << std::endl;

-    //  std::cout << " begin get Filter " << std::endl;
+  //  std::cout << " begin get Filter " << std::endl;

-    auto filter_dims = param_.Filter()->dims();
+  auto filter_dims = param_.Filter()->dims();

-    //  std::cout << " end get Filter " << std::endl;
+  //  std::cout << " end get Filter " << std::endl;

-    //  std::cout << " begin get Attrs " << std::endl;
+  //  std::cout << " begin get Attrs " << std::endl;

-    const std::vector<int> &strides = param_.Strides();
+  const std::vector<int> &strides = param_.Strides();

-    //  std::cout << " end get Attrs " << strides[0] << std::endl;
+  //  std::cout << " end get Attrs " << strides[0] << std::endl;

-    std::vector<int> paddings = param_.Paddings();
+  std::vector<int> paddings = param_.Paddings();

-    int groups = param_.Groups();
+  int groups = param_.Groups();

-    std::vector<int> dilations = param_.Dilations();
+  std::vector<int> dilations = param_.Dilations();

-    std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-    for (size_t i = 0; i < strides.size(); ++i) {
-        output_shape.push_back(ConvOutputSize(in_dims[i + 2],
-                                              filter_dims[i + 2], dilations[i],
-                                              paddings[i], strides[i]));
-    }
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
+                                          dilations[i], paddings[i],
+                                          strides[i]));
+  }

-    framework::DDim ddim = framework::make_ddim(output_shape);
-    param_.Output()->Resize(ddim);
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  param_.Output()->Resize(ddim);
 }

 template class ConvOp<CPU, float>;

--- a/src/operators/conv_op.h
+++ b/src/operators/conv_op.h
@@ -28,25 +28,25 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class ConvOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    ConvOp(const std::string &type, const VariableNameMap &inputs,
-           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
-           std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
-
-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
-
-    void Run() const {
-        operators::ConvKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-        this->ClearVariables({"Filter", "Input"});
-    }
-
-  private:
-    ConvParam param_;
+public:
+  ConvOp(const std::string &type, const VariableNameMap &inputs,
+         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+         std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+  void Run() const {
+    operators::ConvKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+    this->ClearVariables({"Filter", "Input"});
+  }
+
+private:
+  ConvParam param_;
 };

 } // namespace operators

--- a/src/operators/elementwise_add_op.cpp
+++ b/src/operators/elementwise_add_op.cpp
@@ -23,8 +23,8 @@ namespace operators {

 template <typename Dtype, typename T>
 void ElementwiseAddOp<Dtype, T>::InferShape() const {
-    auto x_dim = param_.InputX()->dims();
-    param_.Out()->Resize(x_dim);
+  auto x_dim = param_.InputX()->dims();
+  param_.Out()->Resize(x_dim);
 }
 template class ElementwiseAddOp<CPU, float>;
 } // namespace operators

--- a/src/operators/elementwise_add_op.h
+++ b/src/operators/elementwise_add_op.h
@@ -27,25 +27,25 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class ElementwiseAddOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    ElementwiseAddOp(const std::string &type, const VariableNameMap &inputs,
-                     const VariableNameMap &outputs,
-                     const framework::AttributeMap attrs,
-                     std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
+public:
+  ElementwiseAddOp(const std::string &type, const VariableNameMap &inputs,
+                   const VariableNameMap &outputs,
+                   const framework::AttributeMap attrs,
+                   std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}

-    void Run() const {
-        operators::ElementwiseAddKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-    }
+  void Run() const {
+    operators::ElementwiseAddKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }

-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;

-  protected:
-    ElementwiseAddParam param_;
+protected:
+  ElementwiseAddParam param_;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -21,73 +21,72 @@ namespace operators {

 template <>
 void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
-    /// todo: test.
-    const Tensor *input_x = param.InputX();
-    auto input_x_ptr = input_x->data<float>();
-    const auto &x_dims = input_x->dims();
-    const int N = x_dims[0];
-    const int C = x_dims[1];
-    const int H = x_dims[2];
-    const int W = x_dims[3];
-    const int stride0 = C * H * W;
-    const int stride1 = H * W;
-    const int stride2 = W;
-    Tensor *out = param.OutputY();
-    auto out_ptr = out->mutable_data<float>();
-    const float epsilon = param.Epsilon();
-    const Tensor *mean = param.InputMean();
-    const Tensor *variance = param.InputVariance();
-    const Tensor *scale = param.InputScale();
-    const Tensor *bias = param.InputBias();
-    auto mean_ptr = mean->data<float>();
-    auto variance_ptr = variance->data<float>();
-    auto scale_ptr = scale->data<float>();
-    auto bias_ptr = bias->data<float>();
+  /// todo: test.
+  const Tensor *input_x = param.InputX();
+  auto input_x_ptr = input_x->data<float>();
+  const auto &x_dims = input_x->dims();
+  const int N = x_dims[0];
+  const int C = x_dims[1];
+  const int H = x_dims[2];
+  const int W = x_dims[3];
+  const int stride0 = C * H * W;
+  const int stride1 = H * W;
+  const int stride2 = W;
+  Tensor *out = param.OutputY();
+  auto out_ptr = out->mutable_data<float>();
+  const float epsilon = param.Epsilon();
+  const Tensor *mean = param.InputMean();
+  const Tensor *variance = param.InputVariance();
+  const Tensor *scale = param.InputScale();
+  const Tensor *bias = param.InputBias();
+  auto mean_ptr = mean->data<float>();
+  auto variance_ptr = variance->data<float>();
+  auto scale_ptr = scale->data<float>();
+  auto bias_ptr = bias->data<float>();

-    Tensor inv_std;
-    auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
-    if (C != variance->numel()) {
-        std::cout << "C must equal to variance.numel()" << std::endl;
-    }
-    assert(C == variance->numel());
+  Tensor inv_std;
+  auto inv_std_ptr = inv_std.mutable_data<float>(make_ddim({C}));
+  if (C != variance->numel()) {
+    std::cout << "C must equal to variance.numel()" << std::endl;
+  }
+  assert(C == variance->numel());

-    /// std = (var + epsilon).sqrt();
-    /// inv_std = 1 / std;
-    for (int i = 0; i < C; i++) {
-        inv_std_ptr[i] =
-            1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
-    }
+  /// std = (var + epsilon).sqrt();
+  /// inv_std = 1 / std;
+  for (int i = 0; i < C; i++) {
+    inv_std_ptr[i] =
+        1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
+  }

-    Tensor new_scale;
-    auto new_scale_ptr = new_scale.mutable_data<float>(make_ddim({C}));
-    Tensor new_bias;
-    auto new_bias_ptr = new_bias.mutable_data<float>(make_ddim({C}));
+  Tensor new_scale;
+  auto new_scale_ptr = new_scale.mutable_data<float>(make_ddim({C}));
+  Tensor new_bias;
+  auto new_bias_ptr = new_bias.mutable_data<float>(make_ddim({C}));

-    /// ((x - est_mean) * (inv_var) * scale + bias equal to
-    /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
-    for (int i = 0; i < C; i++) {
-        new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
-        new_bias_ptr[i] =
-            bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
-        {
-            for (int n = 0; n < N; n++) {
-                for (int h = 0; h < H; h++) {
-                    for (int w = 0; w < W; w++) {
-                        int index = n * stride0 + i * stride1 + h * stride2 + w;
-                        out_ptr[index] = input_x_ptr[index] * new_scale_ptr[i] +
-                                         new_bias_ptr[i];
-                    }
-                }
-            }
+  /// ((x - est_mean) * (inv_var) * scale + bias equal to
+  /// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
+  for (int i = 0; i < C; i++) {
+    new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
+    new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
+    {
+      for (int n = 0; n < N; n++) {
+        for (int h = 0; h < H; h++) {
+          for (int w = 0; w < W; w++) {
+            int index = n * stride0 + i * stride1 + h * stride2 + w;
+            out_ptr[index] =
+                input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
+          }
        }
+      }
    }
-    DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
-    DLOG << "input_x_ptr : " << input_x_ptr[102];
-    DLOG << "variance : " << variance_ptr[5];
-    DLOG << "inv_std_ptr : " << inv_std_ptr[5];
-    DLOG << "new_scale_ptr : " << new_scale_ptr[5];
-    DLOG << "new_bias_ptr : " << new_bias_ptr[5];
-    DLOG << "out_ptr : " << out_ptr[102];
+  }
+  DLOG << "input[2,5,1,0](input[102]) ,channel 5 :";
+  DLOG << "input_x_ptr : " << input_x_ptr[102];
+  DLOG << "variance : " << variance_ptr[5];
+  DLOG << "inv_std_ptr : " << inv_std_ptr[5];
+  DLOG << "new_scale_ptr : " << new_scale_ptr[5];
+  DLOG << "new_bias_ptr : " << new_bias_ptr[5];
+  DLOG << "out_ptr : " << out_ptr[102];
 }
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/arm/concat_kernel.cpp
+++ b/src/operators/kernel/arm/concat_kernel.cpp
@@ -19,36 +19,36 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 template <typename T> class ConcatFunctor {
-  public:
-    void operator()(const std::vector<framework::Tensor> &input, const int axis,
-                    framework::Tensor *output) {
-        size_t num = input.size();
-        int rows = 1;
-        auto dim_0 = input[0].dims();
-        for (int i = 0; i < axis; ++i) {
-            rows *= dim_0[i];
-        }
-        int out_rows = rows, out_cols = 0;
+public:
+  void operator()(const std::vector<framework::Tensor> &input, const int axis,
+                  framework::Tensor *output) {
+    size_t num = input.size();
+    int rows = 1;
+    auto dim_0 = input[0].dims();
+    for (int i = 0; i < axis; ++i) {
+      rows *= dim_0[i];
+    }
+    int out_rows = rows, out_cols = 0;

-        std::vector<int64_t> input_cols(input.size());
-        for (int i = 0; i < num; ++i) {
-            int t_cols = input[i].numel() / rows;
-            out_cols += t_cols;
-            input_cols[i] = t_cols;
-        }
+    std::vector<int64_t> input_cols(input.size());
+    for (int i = 0; i < num; ++i) {
+      int t_cols = input[i].numel() / rows;
+      out_cols += t_cols;
+      input_cols[i] = t_cols;
+    }

-        // computation
-        for (int k = 0; k < out_rows; ++k) {
-            T *dst_ptr = output->data<T>() + k * out_cols;
-            int col_idx = 0;
-            for (int j = 0; j < num; ++j) {
-                int col_len = input_cols[j];
-                const T *src_prt = input[j].data<T>() + k * col_len;
-                memory::Copy(dst_ptr + col_idx, src_prt, sizeof(T) * col_len);
-                col_idx += col_len;
-            }
-        }
+    // computation
+    for (int k = 0; k < out_rows; ++k) {
+      T *dst_ptr = output->data<T>() + k * out_cols;
+      int col_idx = 0;
+      for (int j = 0; j < num; ++j) {
+        int col_len = input_cols[j];
+        const T *src_prt = input[j].data<T>() + k * col_len;
+        memory::Copy(dst_ptr + col_idx, src_prt, sizeof(T) * col_len);
+        col_idx += col_len;
+      }
    }
+  }
 };
 template <typename T>
 void StridedNumelCopyWithAxis(int64_t axis, T *dst,
@@ -56,61 +56,60 @@ void StridedNumelCopyWithAxis(int64_t axis, T *dst,
                              const T *src,
                              const framework::DDim &src_stride_numel,
                              int64_t size) {
-    int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
-    int64_t src_after = src_stride_numel[axis];
-    int64_t dst_after = dst_stride_numel[axis];
+  int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
+  int64_t src_after = src_stride_numel[axis];
+  int64_t dst_after = dst_stride_numel[axis];

-    ///"src and dst tensor should have the same dims size."
-    assert(src_stride_numel.size() == dst_stride_numel.size());
+  ///"src and dst tensor should have the same dims size."
+  assert(src_stride_numel.size() == dst_stride_numel.size());

-    for (int64_t i = 0; i < axis; ++i) {
-        if (i < axis) {
-            /// src and dst should have the same elements
-            /// except the specified axis.
-            assert(src_stride_numel[i] / src_stride_numel[axis] ==
-                   dst_stride_numel[i] / dst_stride_numel[axis]);
+  for (int64_t i = 0; i < axis; ++i) {
+    if (i < axis) {
+      /// src and dst should have the same elements
+      /// except the specified axis.
+      assert(src_stride_numel[i] / src_stride_numel[axis] ==
+             dst_stride_numel[i] / dst_stride_numel[axis]);

-        } else if (i == axis) {
-            continue;
-        } else {
-            /// "src and dst should have the same elements "
-            ///         "except the specified axis."
-            assert(src_stride_numel[i] == dst_stride_numel[i]);
-        }
+    } else if (i == axis) {
+      continue;
+    } else {
+      /// "src and dst should have the same elements "
+      ///         "except the specified axis."
+      assert(src_stride_numel[i] == dst_stride_numel[i]);
    }
+  }

-    for (int64_t i = 0; i < before; ++i) {
-        memory::Copy(dst + i * dst_after, src + i * src_after,
-                     sizeof(T) * size);
-    }
+  for (int64_t i = 0; i < before; ++i) {
+    memory::Copy(dst + i * dst_after, src + i * src_after, sizeof(T) * size);
+  }
 }

 template <>
 void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
-    auto inputs = param.Inputs();
-    auto *out = param.Out();
-    int64_t axis = param.Axis();
-    out->mutable_data<float>();
+  auto inputs = param.Inputs();
+  auto *out = param.Out();
+  int64_t axis = param.Axis();
+  out->mutable_data<float>();

-    /// Sometimes direct copies will be faster, this maybe need deeply analysis.
-    if (axis == 0 && inputs.size() < 10) {
-        size_t output_offset = 0;
-        for (auto *in : inputs) {
-            auto in_stride = framework::stride_numel(in->dims());
-            auto out_stride = framework::stride_numel(out->dims());
-            StridedNumelCopyWithAxis<float>(
-                axis, out->data<float>() + output_offset, out_stride,
-                in->data<float>(), in_stride, in_stride[axis]);
-            output_offset += in_stride[axis];
-        }
-    } else {
-        std::vector<framework::Tensor> inputs_concat(inputs.size());
-        for (int j = 0; j < inputs.size(); ++j) {
-            inputs_concat[j] = *inputs[j];
-        }
-        ConcatFunctor<float> concat_functor;
-        concat_functor(inputs_concat, static_cast<int>(axis), out);
+  /// Sometimes direct copies will be faster, this maybe need deeply analysis.
+  if (axis == 0 && inputs.size() < 10) {
+    size_t output_offset = 0;
+    for (auto *in : inputs) {
+      auto in_stride = framework::stride_numel(in->dims());
+      auto out_stride = framework::stride_numel(out->dims());
+      StridedNumelCopyWithAxis<float>(axis, out->data<float>() + output_offset,
+                                      out_stride, in->data<float>(), in_stride,
+                                      in_stride[axis]);
+      output_offset += in_stride[axis];
+    }
+  } else {
+    std::vector<framework::Tensor> inputs_concat(inputs.size());
+    for (int j = 0; j < inputs.size(); ++j) {
+      inputs_concat[j] = *inputs[j];
    }
+    ConcatFunctor<float> concat_functor;
+    concat_functor(inputs_concat, static_cast<int>(axis), out);
+  }
 }

 } // namespace operators

--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -24,128 +24,126 @@ namespace operators {
 bool IsExpand(const std::vector<int64_t> &filter_dim,
              const std::vector<int> &strides, const std::vector<int> &paddings,
              const std::vector<int> &dilations) {
-    bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
-    for (size_t j = 0; j < strides.size(); ++j) {
-        filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
-        strides_1 = strides_1 && (strides[j] == 1);
-        padding_0 = padding_0 && (paddings[j] == 0);
-        dilation_1 = dilation_1 && (dilations[j] == 1);
-    }
-    return !(filter_1 && strides_1 && padding_0 && dilation_1);
+  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
+  for (size_t j = 0; j < strides.size(); ++j) {
+    filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
+    strides_1 = strides_1 && (strides[j] == 1);
+    padding_0 = padding_0 && (paddings[j] == 0);
+    dilation_1 = dilation_1 && (dilations[j] == 1);
+  }
+  return !(filter_1 && strides_1 && padding_0 && dilation_1);
 }

 template <> void ConvKernel<CPU, float>::Compute(const ConvParam &param) const {
-    LOG(kLOG_DEBUG) << param;
-
-    const Tensor *input = param.Input();
-
-    // The filter will be reshaped in the calculations,
-    // so here use an assignment operation,
-    // that avoids modifying the variable in the Scope.
-    Tensor filter = *param.Filter();
-
-    Tensor *output = param.Output();
-    //            output->mutable_data<T>(context.GetPlace());
-
-    int groups = param.Groups();
-    std::vector<int> strides = param.Strides();
-    std::vector<int> paddings = param.Paddings();
-    std::vector<int> dilations = param.Dilations();
-
-    DLOG << " compute end get Attrs " << strides[0];
-
-    const int batch_size = static_cast<int>(input->dims()[0]);
-
-    // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
-    // k_w}
-    std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-    // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
-    // o_w}
-    std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-
-    // use col_shape in the im2col calculation
-    // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
-    // k_w, o_d,
-    // o_h, o_w}
-    size_t data_dim = filter_shape_vec.size() - 2;
-    std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-    col_shape_vec[0] = input->dims()[1] / groups;
-    for (size_t j = 0; j < data_dim; ++j) {
-        col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-        col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-    }
-    framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-
-    // use col_matrix_shape in the gemm calculation
-    // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
-    // o_d *
-    // o_h * o_w)
-    framework::DDim col_matrix_shape =
-        framework::flatten_to_2d(col_shape, data_dim + 1);
-
-    bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
-    Tensor col;
-    // col_matrix shares the same piece of data with col,
-    // but will be reshaped into a two-dimensional matrix shape
-    // to call the matrix multiplication interface.
-    Tensor col_matrix;
-    if (is_expand) {
-        col.mutable_data<float>(col_shape);
+  LOG(kLOG_DEBUG) << param;
+
+  const Tensor *input = param.Input();
+
+  // The filter will be reshaped in the calculations,
+  // so here use an assignment operation,
+  // that avoids modifying the variable in the Scope.
+  Tensor filter = *param.Filter();
+
+  Tensor *output = param.Output();
+  //            output->mutable_data<T>(context.GetPlace());
+
+  int groups = param.Groups();
+  std::vector<int> strides = param.Strides();
+  std::vector<int> paddings = param.Paddings();
+  std::vector<int> dilations = param.Dilations();
+
+  DLOG << " compute end get Attrs " << strides[0];
+
+  const int batch_size = static_cast<int>(input->dims()[0]);
+
+  // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
+  // k_w}
+  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
+  // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
+  // o_w}
+  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
+
+  // use col_shape in the im2col calculation
+  // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
+  // k_w, o_d,
+  // o_h, o_w}
+  size_t data_dim = filter_shape_vec.size() - 2;
+  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+  col_shape_vec[0] = input->dims()[1] / groups;
+  for (size_t j = 0; j < data_dim; ++j) {
+    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+  }
+  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+
+  // use col_matrix_shape in the gemm calculation
+  // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
+  // o_d *
+  // o_h * o_w)
+  framework::DDim col_matrix_shape =
+      framework::flatten_to_2d(col_shape, data_dim + 1);
+
+  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
+  Tensor col;
+  // col_matrix shares the same piece of data with col,
+  // but will be reshaped into a two-dimensional matrix shape
+  // to call the matrix multiplication interface.
+  Tensor col_matrix;
+  if (is_expand) {
+    col.mutable_data<float>(col_shape);
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+  }
+
+  framework::DDim input_shape = framework::slice_ddim(
+      input->dims(), 1, static_cast<int>(input->dims().size()));
+
+  framework::DDim filter_matrix_shape = {filter.dims()[0],
+                                         filter.numel() / filter.dims()[0]};
+  filter.Resize(filter_matrix_shape);
+
+  framework::DDim output_matrix_shape = {
+      output->dims()[1],
+      output->numel() / (output->dims()[0] * output->dims()[1])};
+
+  // convolution operator: im2col(or vol2col) + gemm
+  int in_step = static_cast<int>(input->dims()[1]) / groups;
+  int out_step = static_cast<int>(output->dims()[1]) / groups;
+
+  math::Vol2ColFunctor<CPU, float> vol2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+
+  //            auto& dev_ctx = context.template
+  //            device_context<DeviceContext>();
+  for (int i = 0; i < batch_size; i++) {
+    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+
+    for (int g = 0; g < groups; g++) {
+      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+
+      if (!is_expand) {
+        col.ShareDataWith(in_slice);
        col_matrix.ShareDataWith(col);
        col_matrix.Resize(col_matrix_shape);
+      } else if (data_dim == 2U) {
+        // im2col
+        im2col(in_slice, dilations, strides,
+               std::vector<int>{paddings[0], paddings[1], paddings[0],
+                                paddings[1]},
+               &col);
+      } else if (data_dim == 3U) {
+        // vol2col
+        vol2col(in_slice, dilations, strides, paddings, &col);
+      }
+
+      // gemm
+      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      math::matmul<float>(filter_slice, false, col_matrix, false, float(1.0),
+                          &out_slice, float(0.0));
    }
-
-    framework::DDim input_shape = framework::slice_ddim(
-        input->dims(), 1, static_cast<int>(input->dims().size()));
-
-    framework::DDim filter_matrix_shape = {filter.dims()[0],
-                                           filter.numel() / filter.dims()[0]};
-    filter.Resize(filter_matrix_shape);
-
-    framework::DDim output_matrix_shape = {
-        output->dims()[1],
-        output->numel() / (output->dims()[0] * output->dims()[1])};
-
-    // convolution operator: im2col(or vol2col) + gemm
-    int in_step = static_cast<int>(input->dims()[1]) / groups;
-    int out_step = static_cast<int>(output->dims()[1]) / groups;
-
-    math::Vol2ColFunctor<CPU, float> vol2col;
-    math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-
-    //            auto& dev_ctx = context.template
-    //            device_context<DeviceContext>();
-    for (int i = 0; i < batch_size; i++) {
-        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-
-        for (int g = 0; g < groups; g++) {
-            Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-
-            if (!is_expand) {
-                col.ShareDataWith(in_slice);
-                col_matrix.ShareDataWith(col);
-                col_matrix.Resize(col_matrix_shape);
-            } else if (data_dim == 2U) {
-                // im2col
-                im2col(in_slice, dilations, strides,
-                       std::vector<int>{paddings[0], paddings[1], paddings[0],
-                                        paddings[1]},
-                       &col);
-            } else if (data_dim == 3U) {
-                // vol2col
-                vol2col(in_slice, dilations, strides, paddings, &col);
-            }
-
-            // gemm
-            Tensor out_slice =
-                out_batch.Slice(g * out_step, (g + 1) * out_step);
-            Tensor filter_slice =
-                filter.Slice(g * out_step, (g + 1) * out_step);
-            math::matmul<float>(filter_slice, false, col_matrix, false,
-                                float(1.0), &out_slice, float(0.0));
-        }
-    }
+  }
 }

 template class ConvKernel<CPU, float>;

--- a/src/operators/kernel/arm/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -20,19 +20,19 @@ namespace paddle_mobile {
 namespace operators {

 template <typename T> struct AddFunctor {
-    inline T operator()(T a, T b) const { return a + b; }
+  inline T operator()(T a, T b) const { return a + b; }
 };

 template <>
 void ElementwiseAddKernel<CPU, float>::Compute(
    const ElementwiseAddParam &param) const {
-    const Tensor *input_x = param.InputX();
-    const Tensor *input_y = param.InputY();
-    Tensor *Out = param.Out();
-    Out->mutable_data<float>();
-    const int axis = param.Axis();
-    ElementwiseComputeEx<AddFunctor<float>, float>(input_x, input_y, axis,
-                                                   AddFunctor<float>(), Out);
+  const Tensor *input_x = param.InputX();
+  const Tensor *input_y = param.InputY();
+  Tensor *Out = param.Out();
+  Out->mutable_data<float>();
+  const int axis = param.Axis();
+  ElementwiseComputeEx<AddFunctor<float>, float>(input_x, input_y, axis,
+                                                 AddFunctor<float>(), Out);
 }

 template class ElementwiseAddKernel<CPU, float>;

--- a/src/operators/kernel/arm/lrn_kernel.cpp
+++ b/src/operators/kernel/arm/lrn_kernel.cpp
@@ -24,21 +24,21 @@ namespace paddle_mobile {
 namespace operators {

 template <> void LrnKernel<CPU, float>::Compute(const LrnParam &param) const {
-    const Tensor *input_x = param.InputX();
-    auto x_dims = input_x->dims();
-    /// data_format = NCHW
-    const int N = x_dims[0];
-    const int C = x_dims[1];
-    const int H = x_dims[2];
-    const int W = x_dims[3];
-    Tensor *out = param.Out();
-    out->mutable_data<float>();
-    const int n = param.N();
-    const float alpha = param.Alpha();
-    const float beta = param.Beta();
-    const float k = param.K();
-    LRNFunctor<float> lrnFunctor;
-    lrnFunctor(*input_x, out, N, C, H, W, n, k, alpha, beta);
+  const Tensor *input_x = param.InputX();
+  auto x_dims = input_x->dims();
+  /// data_format = NCHW
+  const int N = x_dims[0];
+  const int C = x_dims[1];
+  const int H = x_dims[2];
+  const int W = x_dims[3];
+  Tensor *out = param.Out();
+  out->mutable_data<float>();
+  const int n = param.N();
+  const float alpha = param.Alpha();
+  const float beta = param.Beta();
+  const float k = param.K();
+  LRNFunctor<float> lrnFunctor;
+  lrnFunctor(*input_x, out, N, C, H, W, n, k, alpha, beta);
 }

 template class LrnKernel<CPU, float>;

--- a/src/operators/kernel/arm/mul_kernel.cpp
+++ b/src/operators/kernel/arm/mul_kernel.cpp
@@ -24,27 +24,27 @@ namespace paddle_mobile {
 namespace operators {

 template <> void MulKernel<CPU, float>::Compute(const MulParam &param) const {
-    const Tensor *input_x = param.InputX();
-    const Tensor *input_y = param.InputY();
-    Tensor *out = param.Out();
-    out->mutable_data<float>();
-    const Tensor x_matrix =
-        input_x->dims().size() > 2
-            ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
-            : *input_x;
-    const Tensor y_matrix =
-        input_y->dims().size() > 2
-            ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
-            : *input_y;
-    auto out_dim = out->dims();
-    if (out_dim.size() != 2) {
-        out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
-    }
-    math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
-                        out, static_cast<float>(0));
-    if (out_dim.size() != 2) {
-        out->Resize(out_dim);
-    }
+  const Tensor *input_x = param.InputX();
+  const Tensor *input_y = param.InputY();
+  Tensor *out = param.Out();
+  out->mutable_data<float>();
+  const Tensor x_matrix =
+      input_x->dims().size() > 2
+          ? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
+          : *input_x;
+  const Tensor y_matrix =
+      input_y->dims().size() > 2
+          ? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
+          : *input_y;
+  auto out_dim = out->dims();
+  if (out_dim.size() != 2) {
+    out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
+  }
+  math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
+                      out, static_cast<float>(0));
+  if (out_dim.size() != 2) {
+    out->Resize(out_dim);
+  }
 }

 template class MulKernel<CPU, float>;

--- a/src/operators/kernel/arm/pool_kernel.cpp
+++ b/src/operators/kernel/arm/pool_kernel.cpp
@@ -24,54 +24,54 @@ namespace operators {
 inline void PoolBasic(std::string pooling_type, std::vector<int> ksize,
                      std::vector<int> strides, std::vector<int> paddings,
                      const Tensor *in_x, Tensor *out) {
-    if (pooling_type == "max") {
-        math::PoolFunctor<CPU, math::MaxPool<float>, float> pool2d_forward;
-        math::MaxPool<float> pool_process;
-        pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
+  if (pooling_type == "max") {
+    math::PoolFunctor<CPU, math::MaxPool<float>, float> pool2d_forward;
+    math::MaxPool<float> pool_process;
+    pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);

-    } else if (pooling_type == "avg") {
-        math::PoolFunctor<CPU, math::AvgPool<float>, float> pool2d_forward;
-        math::AvgPool<float> pool_process;
-        pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
-    }
+  } else if (pooling_type == "avg") {
+    math::PoolFunctor<CPU, math::AvgPool<float>, float> pool2d_forward;
+    math::AvgPool<float> pool_process;
+    pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
+  }
 }

 template <> void PoolKernel<CPU, float>::Compute(const PoolParam &param) const {
-    const Tensor *in_x = param.Input();
-    Tensor *out = param.Output();
-    std::string pooling_type = param.PoolingType();
+  const Tensor *in_x = param.Input();
+  Tensor *out = param.Output();
+  std::string pooling_type = param.PoolingType();

-    std::vector<int> ksize = param.Ksize();
+  std::vector<int> ksize = param.Ksize();

-    std::vector<int> strides = param.Strides();
+  std::vector<int> strides = param.Strides();

-    std::vector<int> paddings = param.Paddings();
-    if (ksize.size() != 2) {
-        LOG(paddle_mobile::LogLevel::kLOG_ERROR)
-            << "Pool op only supports 2D and 3D input.";
-    }
+  std::vector<int> paddings = param.Paddings();
+  if (ksize.size() != 2) {
+    LOG(paddle_mobile::LogLevel::kLOG_ERROR)
+        << "Pool op only supports 2D and 3D input.";
+  }

-    if (param.isGlobalPooling()) {
-        for (size_t i = 0; i < ksize.size(); ++i) {
-            paddings[i] = 0;
-            ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
-        }
+  if (param.isGlobalPooling()) {
+    for (size_t i = 0; i < ksize.size(); ++i) {
+      paddings[i] = 0;
+      ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
    }
+  }

-    PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
+  PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);

-    //    if (param.isGlobalPooling() || ksize[0] != ksize[1] ||
-    //        strides[0] != strides[1] || strides[1] != 2 ||
-    //        paddings[0] != paddings[1] || paddings[1] > 1) {
-    //        PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
-    //
-    //    } else if (ksize[0] == 2) {
-    //
-    //    } else if (ksize[0] == 3) {
-    //
-    //    } else {
-    //        PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
-    //    }
+  //    if (param.isGlobalPooling() || ksize[0] != ksize[1] ||
+  //        strides[0] != strides[1] || strides[1] != 2 ||
+  //        paddings[0] != paddings[1] || paddings[1] > 1) {
+  //        PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
+  //
+  //    } else if (ksize[0] == 2) {
+  //
+  //    } else if (ksize[0] == 3) {
+  //
+  //    } else {
+  //        PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
+  //    }
 }
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/batchnorm_kernel.h
+++ b/src/operators/kernel/batchnorm_kernel.h
@@ -28,8 +28,8 @@ using namespace framework;
 template <typename DeviceType, typename T>
 class BatchNormKernel
    : public framework::OpKernelBase<DeviceType, BatchNormParam> {
-  public:
-    void Compute(const BatchNormParam &param) const;
+public:
+  void Compute(const BatchNormParam &param) const;
 };

 } // namespace operators

--- a/src/operators/kernel/concat_kernel.h
+++ b/src/operators/kernel/concat_kernel.h
@@ -26,8 +26,8 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class ConcatKernel : public framework::OpKernelBase<DeviceType, ConcatParam> {
-  public:
-    void Compute(const ConcatParam &param) const;
+public:
+  void Compute(const ConcatParam &param) const;
 };

 } // namespace operators

--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -31,8 +31,8 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class ConvKernel : public framework::OpKernelBase<DeviceType, ConvParam> {
-  public:
-    void Compute(const ConvParam &param) const;
+public:
+  void Compute(const ConvParam &param) const;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/elementwise_add_kernel.h
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -29,8 +29,8 @@ using namespace framework;
 template <typename DeviceType, typename T>
 class ElementwiseAddKernel
    : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
-  public:
-    void Compute(const ElementwiseAddParam &param) const;
+public:
+  void Compute(const ElementwiseAddParam &param) const;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/lrn_kernel.h
+++ b/src/operators/kernel/lrn_kernel.h
@@ -26,52 +26,49 @@ namespace operators {
 using namespace framework;

 template <typename T> struct LRNFunctor {
-    void operator()(const framework::Tensor &input, framework::Tensor *out,
-                    int N, int C, int H, int W, int n, T k, T alpha, T beta) {
-        auto input_ptr = input.data<T>();
-        const int start = -(n - 1) / 2;
-        const int end = start + n;
+  void operator()(const framework::Tensor &input, framework::Tensor *out, int N,
+                  int C, int H, int W, int n, T k, T alpha, T beta) {
+    auto input_ptr = input.data<T>();
+    const int start = -(n - 1) / 2;
+    const int end = start + n;

-        const int stride0 = C * H * W;
-        const int stride1 = H * W;
-        const int stride2 = W;
-        const int stride3 = 1;
+    const int stride0 = C * H * W;
+    const int stride1 = H * W;
+    const int stride2 = W;
+    const int stride3 = 1;

-        framework::Tensor sqr_buffer;
-        auto sqr_buffer_ptr = sqr_buffer.mutable_data<T>(input.dims());
-        std::fill(sqr_buffer_ptr, sqr_buffer_ptr + sqr_buffer.numel(), k);
-        for (int a = 0; a < N; a++) {
-            for (int b = 0; b < C; b++) {
-                for (int index = start; index < end; index++) {
-                    int channel = b + index;
-                    if (channel >= 0 && channel < C) {
-                        for (int c = 0; c < H; c++) {
-                            for (int d = 0; d < W; d++) {
-                                int u =
-                                    a * stride0 + b * stride1 + c * stride2 + d;
+    framework::Tensor sqr_buffer;
+    auto sqr_buffer_ptr = sqr_buffer.mutable_data<T>(input.dims());
+    std::fill(sqr_buffer_ptr, sqr_buffer_ptr + sqr_buffer.numel(), k);
+    for (int a = 0; a < N; a++) {
+      for (int b = 0; b < C; b++) {
+        for (int index = start; index < end; index++) {
+          int channel = b + index;
+          if (channel >= 0 && channel < C) {
+            for (int c = 0; c < H; c++) {
+              for (int d = 0; d < W; d++) {
+                int u = a * stride0 + b * stride1 + c * stride2 + d;

-                                int i = a * stride0 + channel * stride1 +
-                                        c * stride2 + d;
+                int i = a * stride0 + channel * stride1 + c * stride2 + d;

-                                sqr_buffer_ptr[u] +=
-                                    alpha * input_ptr[i] * input_ptr[i];
-                            }
-                        }
-                    }
-                }
+                sqr_buffer_ptr[u] += alpha * input_ptr[i] * input_ptr[i];
+              }
            }
+          }
        }
-        auto out_ptr = out->data<T>();
-        for (int i = 0; i < input.numel(); i++) {
-            out_ptr[i] = input_ptr[i] / pow(sqr_buffer_ptr[i], beta);
-        }
+      }
+    }
+    auto out_ptr = out->data<T>();
+    for (int i = 0; i < input.numel(); i++) {
+      out_ptr[i] = input_ptr[i] / pow(sqr_buffer_ptr[i], beta);
    }
+  }
 };

 template <typename DeviceType, typename T>
 class LrnKernel : public framework::OpKernelBase<DeviceType, LrnParam> {
-  public:
-    void Compute(const LrnParam &param) const;
+public:
+  void Compute(const LrnParam &param) const;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/mul_kernel.h
+++ b/src/operators/kernel/mul_kernel.h
@@ -28,8 +28,8 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class MulKernel : public framework::OpKernelBase<DeviceType, MulParam> {
-  public:
-    void Compute(const MulParam &param) const;
+public:
+  void Compute(const MulParam &param) const;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/kernel/pool_kernel.h
+++ b/src/operators/kernel/pool_kernel.h
@@ -28,8 +28,8 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class PoolKernel : public framework::OpKernelBase<DeviceType, PoolParam> {
-  public:
-    void Compute(const PoolParam &param) const;
+public:
+  void Compute(const PoolParam &param) const;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/lrn_op.cpp
+++ b/src/operators/lrn_op.cpp
@@ -22,8 +22,8 @@ namespace paddle_mobile {
 namespace operators {

 template <typename Dtype, typename T> void LrnOp<Dtype, T>::InferShape() const {
-    auto x_dims = param_.InputX()->dims();
-    param_.Out()->Resize(x_dims);
+  auto x_dims = param_.InputX()->dims();
+  param_.Out()->Resize(x_dims);
 }
 template class LrnOp<CPU, float>;
 } // namespace operators

--- a/src/operators/lrn_op.h
+++ b/src/operators/lrn_op.h
@@ -27,24 +27,24 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class LrnOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    LrnOp(const std::string &type, const VariableNameMap &inputs,
-          const VariableNameMap &outputs, const framework::AttributeMap attrs,
-          std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
-
-    void Run() const {
-        operators::LrnKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-    }
-
-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
-
-  protected:
-    LrnParam param_;
+public:
+  LrnOp(const std::string &type, const VariableNameMap &inputs,
+        const VariableNameMap &outputs, const framework::AttributeMap attrs,
+        std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  void Run() const {
+    operators::LrnKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+protected:
+  LrnParam param_;
 };

 } // namespace operators

--- a/src/operators/math/elementwise_op_function.h
+++ b/src/operators/math/elementwise_op_function.h
@@ -34,66 +34,66 @@ namespace operators {
 inline void get_mid_dims(const framework::DDim &x_dims,
                         const framework::DDim &y_dims, const int axis,
                         int *pre, int *n, int *post) {
-    *pre = 1;
-    *n = 1;
-    *post = 1;
-    // compute pre
-    for (int i = 0; i < axis; ++i) {
-        (*pre) *= x_dims[i];
-    }
-
-    for (int i = 0; i < y_dims.size(); ++i) {
-        assert(x_dims[i + axis] == y_dims[i]);
-        /// "Broadcast dimension mismatch.");
-        (*n) *= y_dims[i];
-    }
-
-    for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
-        (*post) *= x_dims[i];
-    }
+  *pre = 1;
+  *n = 1;
+  *post = 1;
+  // compute pre
+  for (int i = 0; i < axis; ++i) {
+    (*pre) *= x_dims[i];
+  }
+
+  for (int i = 0; i < y_dims.size(); ++i) {
+    assert(x_dims[i + axis] == y_dims[i]);
+    /// "Broadcast dimension mismatch.");
+    (*n) *= y_dims[i];
+  }
+
+  for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
+    (*post) *= x_dims[i];
+  }
 }

 /// remove dims tail 1. (4,20,1,1) -> (4,20)
 inline void trim_trailing_singular_dims(framework::DDim *dims) {
-    // Remove trailing dimensions of size 1 for y
-    auto actual_dims_size = dims->size();
-    for (; actual_dims_size != 0; --actual_dims_size) {
-        if ((*dims)[actual_dims_size - 1] != 1)
-            break;
-    }
-    if (actual_dims_size != dims->size()) {
-        auto actual_dims = framework::vectorize(*dims);
-        actual_dims.resize(actual_dims_size);
-        *dims = framework::make_ddim(actual_dims);
-    }
+  // Remove trailing dimensions of size 1 for y
+  auto actual_dims_size = dims->size();
+  for (; actual_dims_size != 0; --actual_dims_size) {
+    if ((*dims)[actual_dims_size - 1] != 1)
+      break;
+  }
+  if (actual_dims_size != dims->size()) {
+    auto actual_dims = framework::vectorize(*dims);
+    actual_dims.resize(actual_dims_size);
+    *dims = framework::make_ddim(actual_dims);
+  }
 }

 template <typename T> class RowwiseTransformIterator {
-  public:
-    RowwiseTransformIterator(const T *ptr, int n) : ptr_(ptr), i_(0), n_(n) {}
-
-    RowwiseTransformIterator<T> &operator++() {
-        ++i_;
-        if (UNLIKELY(i_ == n_)) {
-            i_ = 0;
-        }
-        return *this;
-    }
+public:
+  RowwiseTransformIterator(const T *ptr, int n) : ptr_(ptr), i_(0), n_(n) {}

-    bool operator==(const RowwiseTransformIterator<T> &rhs) const {
-        return (ptr_ + i_) == &(*rhs);
+  RowwiseTransformIterator<T> &operator++() {
+    ++i_;
+    if (UNLIKELY(i_ == n_)) {
+      i_ = 0;
    }
+    return *this;
+  }

-    bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
-        return (ptr_ + i_) != &(*rhs);
-    }
+  bool operator==(const RowwiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) == &(*rhs);
+  }

-    const T &operator*() { return ptr_[i_]; }
+  bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) != &(*rhs);
+  }

-  private:
-    const T *ptr_;
-    int i_;
-    int64_t n_;
+  const T &operator*() { return ptr_[i_]; }
+
+private:
+  const T *ptr_;
+  int i_;
+  int64_t n_;
 };

 /// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
@@ -101,106 +101,105 @@ template <typename T> class RowwiseTransformIterator {
 /// in (4,20,2) is 2 ,
 /// (20,1) move 1 stride , to fill(add) 2 element with the same number.
 template <typename T> class MidWiseTransformIterator {
-  public:
-    MidWiseTransformIterator(const T *ptr, int n, int post)
-        : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
-
-    MidWiseTransformIterator<T> &operator++() {
-        ++j_;
-        if (UNLIKELY(j_ == post_)) {
-            ++i_;
-            j_ = 0;
-            if (UNLIKELY(i_ == n_)) {
-                i_ = 0;
-            }
-        }
-        return *this;
-    }
-
-    bool operator==(const MidWiseTransformIterator<T> &rhs) const {
-        return (ptr_ + i_) == &(*rhs);
-    }
-
-    bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
-        return (ptr_ + i_) != &(*rhs);
-    }
-
-    const T &operator*() { return ptr_[i_]; }
-
-  private:
-    const T *ptr_;
-    int64_t i_;
-    int64_t j_;
-    int64_t n_;
-    int64_t post_;
+public:
+  MidWiseTransformIterator(const T *ptr, int n, int post)
+      : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
+
+  MidWiseTransformIterator<T> &operator++() {
+    ++j_;
+    if (UNLIKELY(j_ == post_)) {
+      ++i_;
+      j_ = 0;
+      if (UNLIKELY(i_ == n_)) {
+        i_ = 0;
+      }
+    }
+    return *this;
+  }
+
+  bool operator==(const MidWiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) == &(*rhs);
+  }
+
+  bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
+    return (ptr_ + i_) != &(*rhs);
+  }
+
+  const T &operator*() { return ptr_[i_]; }
+
+private:
+  const T *ptr_;
+  int64_t i_;
+  int64_t j_;
+  int64_t n_;
+  int64_t post_;
 };

 template <typename Functor, typename T, typename OutType = T>
 class TransformFunctor {
-  public:
-    TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
-                     framework::Tensor *z, Functor func)
-        : x_(x->data<T>()), y_(y->data<T>()), z_(z->mutable_data<OutType>()),
-          nx_(x->numel()), func_(func) {}
-
-    inline void Run() const {
-        math::Transform trans;
-        // 同时执行func(x_, y_)传入z_。
-        trans(x_, x_ + nx_, y_, z_, func_);
-    }
-
-    inline void RunRowWise(int n, int pre) const {
-        math::Transform trans;
-        trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_, func_);
-    }
-
-    inline void RunMidWise(int n, int pre, int post) const {
-        math::Transform trans;
-        trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post), z_,
-              func_);
-    }
-
-  private:
-    const T *x_;
-    const T *y_;
-    OutType *z_;
-    int64_t nx_;
-    Functor func_;
+public:
+  TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
+                   framework::Tensor *z, Functor func)
+      : x_(x->data<T>()), y_(y->data<T>()), z_(z->mutable_data<OutType>()),
+        nx_(x->numel()), func_(func) {}
+
+  inline void Run() const {
+    math::Transform trans;
+    // 同时执行func(x_, y_)传入z_。
+    trans(x_, x_ + nx_, y_, z_, func_);
+  }
+
+  inline void RunRowWise(int n, int pre) const {
+    math::Transform trans;
+    trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_, func_);
+  }
+
+  inline void RunMidWise(int n, int pre, int post) const {
+    math::Transform trans;
+    trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post), z_, func_);
+  }
+
+private:
+  const T *x_;
+  const T *y_;
+  OutType *z_;
+  int64_t nx_;
+  Functor func_;
 };

 template <typename Functor, typename T, typename OutType = T>
 void ElementwiseComputeEx(const framework::Tensor *x,
                          const framework::Tensor *y, int axis, Functor func,
                          framework::Tensor *z) {
-    TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
-
-    auto x_dims = x->dims();
-    auto y_dims = y->dims();
-    // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
-    //                  "Rank of first input must >= rank of second
-    //                  input.");
-
-    if (x_dims == y_dims) {
-        functor.Run();
-        return;
-    }
-
-    /// axis = -1 represent the last dimension.
-    axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
-    // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
-    //               "Axis should be in range [0, x_dims)");
-    trim_trailing_singular_dims(&y_dims);
-    axis = (y_dims.size() == 0) ? x_dims.size() : axis;
-
-    int pre, n, post;
-    get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
-    if (post == 1) {
-        functor.RunRowWise(n, pre);
-        return;
-    } else {
-        functor.RunMidWise(n, pre, post);
-        return;
-    }
+  TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
+
+  auto x_dims = x->dims();
+  auto y_dims = y->dims();
+  // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
+  //                  "Rank of first input must >= rank of second
+  //                  input.");
+
+  if (x_dims == y_dims) {
+    functor.Run();
+    return;
+  }
+
+  /// axis = -1 represent the last dimension.
+  axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
+  // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
+  //               "Axis should be in range [0, x_dims)");
+  trim_trailing_singular_dims(&y_dims);
+  axis = (y_dims.size() == 0) ? x_dims.size() : axis;
+
+  int pre, n, post;
+  get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
+  if (post == 1) {
+    functor.RunRowWise(n, pre);
+    return;
+  } else {
+    functor.RunMidWise(n, pre, post);
+    return;
+  }
 }

 } // namespace operators

--- a/src/operators/math/im2col.cc
+++ b/src/operators/math/im2col.cc
@@ -26,70 +26,65 @@ namespace math {
 * output_width]
 */
 template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
-  public:
-    void operator()(const framework::Tensor &im,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *col) {
-        //    PADDLE_ENFORCE(im.dims().size() == 3);
-        //    PADDLE_ENFORCE(col->dims().size() == 5);
+public:
+  void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *col) {
+    //    PADDLE_ENFORCE(im.dims().size() == 3);
+    //    PADDLE_ENFORCE(col->dims().size() == 5);

-        int im_channels = im.dims()[0];
-        int im_height = im.dims()[1];
-        int im_width = im.dims()[2];
-        int filter_height = col->dims()[1];
-        int filter_width = col->dims()[2];
-        int col_height = col->dims()[3];
-        int col_width = col->dims()[4];
+    int im_channels = im.dims()[0];
+    int im_height = im.dims()[1];
+    int im_width = im.dims()[2];
+    int filter_height = col->dims()[1];
+    int filter_width = col->dims()[2];
+    int col_height = col->dims()[3];
+    int col_width = col->dims()[4];

-        //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
-        //    -
-        //                       ((dilation[0] * (filter_height - 1)
-        //                       + 1))) /
-        //                              stride[0] +
-        //                          1,
-        //                      col_height,
-        //                      "Output_height and
-        //                      padding(padding_up, padding_down)
-        //                      are " "inconsistent.");
-        //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
-        //    -
-        //                       ((dilation[1] * (filter_width - 1)
-        //                       + 1))) /
-        //                              stride[1] +
-        //                          1,
-        //                      col_width,
-        //                      "Output_height and
-        //                      padding(padding_up, padding_down)
-        //                      are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
+    //    -
+    //                       ((dilation[0] * (filter_height - 1)
+    //                       + 1))) /
+    //                              stride[0] +
+    //                          1,
+    //                      col_height,
+    //                      "Output_height and
+    //                      padding(padding_up, padding_down)
+    //                      are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
+    //    -
+    //                       ((dilation[1] * (filter_width - 1)
+    //                       + 1))) /
+    //                              stride[1] +
+    //                          1,
+    //                      col_width,
+    //                      "Output_height and
+    //                      padding(padding_up, padding_down)
+    //                      are " "inconsistent.");

-        int channels_col = im_channels * filter_height * filter_width;
+    int channels_col = im_channels * filter_height * filter_width;

-        const T *im_data = im.data<T>();
-        T *col_data = col->data<T>();
-        for (int c = 0; c < channels_col; ++c) {
-            int w_offset = c % filter_width;
-            int h_offset = (c / filter_width) % filter_height;
-            int c_im = c / (filter_width * filter_height);
-            for (int h = 0; h < col_height; ++h) {
-                int im_row_idx =
-                    h * stride[0] - padding[0] + h_offset * dilation[0];
-                for (int w = 0; w < col_width; ++w) {
-                    int im_col_idx =
-                        w * stride[1] - padding[1] + w_offset * dilation[1];
-                    int col_idx = (c * col_height + h) * col_width + w;
-                    int im_idx =
-                        (im_row_idx + c_im * im_height) * im_width + im_col_idx;
+    const T *im_data = im.data<T>();
+    T *col_data = col->data<T>();
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int c_im = c / (filter_width * filter_height);
+      for (int h = 0; h < col_height; ++h) {
+        int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
+        for (int w = 0; w < col_width; ++w) {
+          int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
+          int col_idx = (c * col_height + h) * col_width + w;
+          int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;

-                    col_data[col_idx] =
-                        (im_row_idx < 0 || im_row_idx >= im_height ||
-                         im_col_idx < 0 || im_col_idx >= im_width)
-                            ? static_cast<T>(0)
-                            : im_data[im_idx];
-                }
-            }
+          col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
+                               im_col_idx < 0 || im_col_idx >= im_width)
+                                  ? static_cast<T>(0)
+                                  : im_data[im_idx];
        }
+      }
    }
+  }
 };

 /*
@@ -99,67 +94,64 @@ template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
 * output_width]
 */
 template <class T> class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
-  public:
-    void operator()(const framework::Tensor &col,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *im) {
-        //    PADDLE_ENFORCE(im->dims().size() == 3);
-        //    PADDLE_ENFORCE(col.dims().size() == 5);
-        int im_channels = im->dims()[0];
-        int im_height = im->dims()[1];
-        int im_width = im->dims()[2];
-        int filter_height = col.dims()[1];
-        int filter_width = col.dims()[2];
-        int col_height = col.dims()[3];
-        int col_width = col.dims()[4];
+public:
+  void operator()(const framework::Tensor &col,
+                  const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *im) {
+    //    PADDLE_ENFORCE(im->dims().size() == 3);
+    //    PADDLE_ENFORCE(col.dims().size() == 5);
+    int im_channels = im->dims()[0];
+    int im_height = im->dims()[1];
+    int im_width = im->dims()[2];
+    int filter_height = col.dims()[1];
+    int filter_width = col.dims()[2];
+    int col_height = col.dims()[3];
+    int col_width = col.dims()[4];

-        //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
-        //    -
-        //                       ((dilation[0] * (filter_height - 1)
-        //                       + 1))) /
-        //                              stride[0] +
-        //                          1,
-        //                      col_height,
-        //                      "Output_height and
-        //                      padding(padding_up, padding_down)
-        //                      are " "inconsistent.");
-        //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
-        //    -
-        //                       ((dilation[1] * (filter_width - 1)
-        //                       + 1))) /
-        //                              stride[1] +
-        //                          1,
-        //                      col_width,
-        //                      "Output_height and
-        //                      padding(padding_up, padding_down)
-        //                      are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
+    //    -
+    //                       ((dilation[0] * (filter_height - 1)
+    //                       + 1))) /
+    //                              stride[0] +
+    //                          1,
+    //                      col_height,
+    //                      "Output_height and
+    //                      padding(padding_up, padding_down)
+    //                      are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
+    //    -
+    //                       ((dilation[1] * (filter_width - 1)
+    //                       + 1))) /
+    //                              stride[1] +
+    //                          1,
+    //                      col_width,
+    //                      "Output_height and
+    //                      padding(padding_up, padding_down)
+    //                      are " "inconsistent.");

-        int channels_col = im_channels * filter_height * filter_width;
+    int channels_col = im_channels * filter_height * filter_width;

-        T *im_data = im->data<T>();
-        const T *col_data = col.data<T>();
+    T *im_data = im->data<T>();
+    const T *col_data = col.data<T>();

-        for (int c = 0; c < channels_col; ++c) {
-            int w_offset = c % filter_width;
-            int h_offset = (c / filter_width) % filter_height;
-            int c_im = c / (filter_width * filter_height);
-            for (int h = 0; h < col_height; ++h) {
-                int im_row_idx =
-                    h * stride[0] - padding[0] + h_offset * dilation[0];
-                for (int w = 0; w < col_width; ++w) {
-                    int im_col_idx =
-                        w * stride[1] - padding[1] + w_offset * dilation[1];
-                    if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
-                        (im_col_idx) >= 0 && (im_col_idx) < im_width) {
-                        im_data[(im_row_idx + c_im * im_height) * im_width +
-                                im_col_idx] +=
-                            col_data[(c * col_height + h) * col_width + w];
-                    }
-                }
-            }
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int c_im = c / (filter_width * filter_height);
+      for (int h = 0; h < col_height; ++h) {
+        int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
+        for (int w = 0; w < col_width; ++w) {
+          int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
+          if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
+              (im_col_idx) >= 0 && (im_col_idx) < im_width) {
+            im_data[(im_row_idx + c_im * im_height) * im_width + im_col_idx] +=
+                col_data[(c * col_height + h) * col_width + w];
+          }
        }
+      }
    }
+  }
 };

 template class Im2ColFunctor<ColFormat::kCFO, CPU, float>;
@@ -174,74 +166,69 @@ template class Col2ImFunctor<ColFormat::kCFO, CPU, double>;
 * filter_width]
 */
 template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
-  public:
-    void operator()(const framework::Tensor &im,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *col) {
-        //    PADDLE_ENFORCE(im.dims().size() == 3);
-        //    PADDLE_ENFORCE(col->dims().size() == 5);
-        int im_channels = im.dims()[0];
-        int im_height = im.dims()[1];
-        int im_width = im.dims()[2];
-        int filter_height = col->dims()[3];
-        int filter_width = col->dims()[4];
-        int col_height = col->dims()[0];
-        int col_width = col->dims()[1];
+public:
+  void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *col) {
+    //    PADDLE_ENFORCE(im.dims().size() == 3);
+    //    PADDLE_ENFORCE(col->dims().size() == 5);
+    int im_channels = im.dims()[0];
+    int im_height = im.dims()[1];
+    int im_width = im.dims()[2];
+    int filter_height = col->dims()[3];
+    int filter_width = col->dims()[4];
+    int col_height = col->dims()[0];
+    int col_width = col->dims()[1];

-        //    PADDLE_ENFORCE_EQ(
-        //        (im_height + padding[0] + padding[2] -
-        //        filter_height) / stride[0]
-        //        + 1, col_height, "Output_height and
-        //        padding(padding_up,
-        //        padding_down) are " "inconsistent.");
-        //    PADDLE_ENFORCE_EQ(
-        //        (im_width + padding[1] + padding[3] -
-        //        filter_width) / stride[1] +
-        //        1, col_width, "col_width and padding(padding_left,
-        //        padding_right)
-        //        are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ(
+    //        (im_height + padding[0] + padding[2] -
+    //        filter_height) / stride[0]
+    //        + 1, col_height, "Output_height and
+    //        padding(padding_up,
+    //        padding_down) are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ(
+    //        (im_width + padding[1] + padding[3] -
+    //        filter_width) / stride[1] +
+    //        1, col_width, "col_width and padding(padding_left,
+    //        padding_right)
+    //        are " "inconsistent.");

-        const T *im_data = im.data<T>();
-        T *col_data = col->data<T>();
+    const T *im_data = im.data<T>();
+    T *col_data = col->data<T>();

-        for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
-            for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
-                for (int channel = 0; channel < im_channels; ++channel) {
-                    for (int filter_row_idx = 0; filter_row_idx < filter_height;
-                         ++filter_row_idx) {
-                        int im_row_offset = col_row_idx * stride[0] +
-                                            filter_row_idx - padding[0];
-                        for (int filter_col_idx = 0;
-                             filter_col_idx < filter_width; ++filter_col_idx) {
-                            int im_col_offset = col_col_idx * stride[1] +
-                                                filter_col_idx - padding[1];
+    for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
+      for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
+        for (int channel = 0; channel < im_channels; ++channel) {
+          for (int filter_row_idx = 0; filter_row_idx < filter_height;
+               ++filter_row_idx) {
+            int im_row_offset =
+                col_row_idx * stride[0] + filter_row_idx - padding[0];
+            for (int filter_col_idx = 0; filter_col_idx < filter_width;
+                 ++filter_col_idx) {
+              int im_col_offset =
+                  col_col_idx * stride[1] + filter_col_idx - padding[1];

-                            int col_offset =
-                                ((((col_row_idx)*col_width + col_col_idx) *
-                                      im_channels +
-                                  channel) *
-                                     filter_height +
-                                 filter_row_idx) *
-                                    filter_width +
-                                filter_col_idx;
+              int col_offset =
+                  ((((col_row_idx)*col_width + col_col_idx) * im_channels +
+                    channel) *
+                       filter_height +
+                   filter_row_idx) *
+                      filter_width +
+                  filter_col_idx;

-                            int im_offset =
-                                (channel * im_height + im_row_offset) *
-                                    im_width +
-                                im_col_offset;
-                            col_data[col_offset] =
-                                (im_row_offset < 0 ||
-                                 im_row_offset >= im_height ||
-                                 im_col_offset < 0 || im_col_offset >= im_width)
-                                    ? static_cast<T>(0)
-                                    : im_data[im_offset];
-                        }
-                    }
-                }
+              int im_offset = (channel * im_height + im_row_offset) * im_width +
+                              im_col_offset;
+              col_data[col_offset] =
+                  (im_row_offset < 0 || im_row_offset >= im_height ||
+                   im_col_offset < 0 || im_col_offset >= im_width)
+                      ? static_cast<T>(0)
+                      : im_data[im_offset];
            }
+          }
        }
+      }
    }
+  }
 };

 /*
@@ -251,74 +238,70 @@ template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
 * filter_width]
 */
 template <class T> class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
-  public:
-    void operator()(const framework::Tensor &col,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *im) {
-        //    PADDLE_ENFORCE(im->dims().size() == 3);
-        //    PADDLE_ENFORCE(col.dims().size() == 5);
-        int im_channels = im->dims()[0];
-        int im_height = im->dims()[1];
-        int im_width = im->dims()[2];
-        int filter_height = col.dims()[3];
-        int filter_width = col.dims()[4];
-        int col_height = col.dims()[0];
-        int col_width = col.dims()[1];
+public:
+  void operator()(const framework::Tensor &col,
+                  const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *im) {
+    //    PADDLE_ENFORCE(im->dims().size() == 3);
+    //    PADDLE_ENFORCE(col.dims().size() == 5);
+    int im_channels = im->dims()[0];
+    int im_height = im->dims()[1];
+    int im_width = im->dims()[2];
+    int filter_height = col.dims()[3];
+    int filter_width = col.dims()[4];
+    int col_height = col.dims()[0];
+    int col_width = col.dims()[1];

-        //    PADDLE_ENFORCE_EQ(
-        //        (im_height + padding[0] + padding[2] -
-        //        filter_height) / stride[0]
-        //        + 1, col_height, "Output_height and
-        //        padding(padding_up,
-        //        padding_down) are " "inconsistent.");
-        //    PADDLE_ENFORCE_EQ(
-        //        (im_width + padding[1] + padding[3] -
-        //        filter_width) / stride[1] +
-        //        1, col_width, "col_width and padding(padding_left,
-        //        padding_right)
-        //        are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ(
+    //        (im_height + padding[0] + padding[2] -
+    //        filter_height) / stride[0]
+    //        + 1, col_height, "Output_height and
+    //        padding(padding_up,
+    //        padding_down) are " "inconsistent.");
+    //    PADDLE_ENFORCE_EQ(
+    //        (im_width + padding[1] + padding[3] -
+    //        filter_width) / stride[1] +
+    //        1, col_width, "col_width and padding(padding_left,
+    //        padding_right)
+    //        are " "inconsistent.");

-        T *im_data = im->data<T>();
-        const T *col_data = col.data<T>();
+    T *im_data = im->data<T>();
+    const T *col_data = col.data<T>();

-        for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
-            for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
-                for (int channel = 0; channel < im_channels; ++channel) {
-                    for (int filter_row_idx = 0; filter_row_idx < filter_height;
-                         ++filter_row_idx) {
-                        int im_row_offset = col_row_idx * stride[0] +
-                                            filter_row_idx - padding[0];
-                        for (int filter_col_idx = 0;
-                             filter_col_idx < filter_width; ++filter_col_idx) {
-                            int im_col_offset = col_col_idx * stride[1] +
-                                                filter_col_idx - padding[1];
+    for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
+      for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
+        for (int channel = 0; channel < im_channels; ++channel) {
+          for (int filter_row_idx = 0; filter_row_idx < filter_height;
+               ++filter_row_idx) {
+            int im_row_offset =
+                col_row_idx * stride[0] + filter_row_idx - padding[0];
+            for (int filter_col_idx = 0; filter_col_idx < filter_width;
+                 ++filter_col_idx) {
+              int im_col_offset =
+                  col_col_idx * stride[1] + filter_col_idx - padding[1];

-                            int col_offset =
-                                (((col_row_idx * col_width + col_col_idx) *
-                                      im_channels +
-                                  channel) *
-                                     filter_height +
-                                 filter_row_idx) *
-                                    filter_width +
-                                filter_col_idx;
+              int col_offset =
+                  (((col_row_idx * col_width + col_col_idx) * im_channels +
+                    channel) *
+                       filter_height +
+                   filter_row_idx) *
+                      filter_width +
+                  filter_col_idx;

-                            if (im_row_offset >= 0 &&
-                                im_row_offset < im_height &&
-                                im_col_offset >= 0 &&
-                                im_col_offset < im_width) {
-                                int im_offset =
-                                    (channel * im_height + im_row_offset) *
-                                        im_width +
-                                    im_col_offset;
-                                im_data[im_offset] += col_data[col_offset];
-                            }
-                        }
-                    }
-                }
+              if (im_row_offset >= 0 && im_row_offset < im_height &&
+                  im_col_offset >= 0 && im_col_offset < im_width) {
+                int im_offset =
+                    (channel * im_height + im_row_offset) * im_width +
+                    im_col_offset;
+                im_data[im_offset] += col_data[col_offset];
+              }
            }
+          }
        }
+      }
    }
+  }
 };

 template class Im2ColFunctor<ColFormat::kOCF, CPU, float>;

--- a/src/operators/math/im2col.h
+++ b/src/operators/math/im2col.h
@@ -89,20 +89,19 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
 */
 template <ColFormat Format, typename DeviceType, typename T>
 class Im2ColFunctor {
-  public:
-    void operator()(const framework::Tensor &im,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *col);
+public:
+  void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *col);
 };

 template <ColFormat Format, typename DeviceType, typename T>
 class Col2ImFunctor {
-  public:
-    void operator()(const framework::Tensor &col,
-                    const std::vector<int> &dilation,
-                    const std::vector<int> &stride,
-                    const std::vector<int> &padding, framework::Tensor *im);
+public:
+  void operator()(const framework::Tensor &col,
+                  const std::vector<int> &dilation,
+                  const std::vector<int> &stride,
+                  const std::vector<int> &padding, framework::Tensor *im);
 };

 } // namespace math

--- a/src/operators/math/math_function.cc
+++ b/src/operators/math/math_function.cc
@@ -22,11 +22,11 @@ template <>
 void gemm<float>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
                 const int M, const int N, const int K, const float alpha,
                 const float *A, const float *B, const float beta, float *C) {
-    int lda = (transA == CblasNoTrans) ? K : M;
-    int ldb = (transB == CblasNoTrans) ? N : K;
-    int ldc = N;
-    cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
-                beta, C, ldc);
+  int lda = (transA == CblasNoTrans) ? K : M;
+  int ldb = (transB == CblasNoTrans) ? N : K;
+  int ldc = N;
+  cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
+              beta, C, ldc);
 }

 template <>
@@ -34,11 +34,11 @@ void gemm<double>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
                  const int M, const int N, const int K, const double alpha,
                  const double *A, const double *B, const double beta,
                  double *C) {
-    int lda = (transA == CblasNoTrans) ? K : M;
-    int ldb = (transB == CblasNoTrans) ? N : K;
-    int ldc = N;
-    cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
-                beta, C, ldc);
+  int lda = (transA == CblasNoTrans) ? K : M;
+  int ldb = (transB == CblasNoTrans) ? N : K;
+  int ldc = N;
+  cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
+              beta, C, ldc);
 }

 template <>
@@ -46,9 +46,9 @@ void gemm<float>(const bool transA, const bool transB, const int M, const int N,
                 const int K, const float alpha, const float *A, const int lda,
                 const float *B, const int ldb, const float beta, float *C,
                 const int ldc) {
-    cblas_sgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
-                transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
-                lda, B, ldb, beta, C, ldc);
+  cblas_sgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
+              transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
+              lda, B, ldb, beta, C, ldc);
 }

 template <>
@@ -56,67 +56,67 @@ void gemm<double>(const bool transA, const bool transB, const int M,
                  const int N, const int K, const double alpha, const double *A,
                  const int lda, const double *B, const int ldb,
                  const double beta, double *C, const int ldc) {
-    cblas_dgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
-                transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
-                lda, B, ldb, beta, C, ldc);
+  cblas_dgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
+              transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
+              lda, B, ldb, beta, C, ldc);
 }

 template <>
 void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
                   const framework::Tensor &matrix_b, bool trans_b, float alpha,
                   framework::Tensor *matrix_out, float beta) {
-    auto dim_a = matrix_a.dims();
-    auto dim_b = matrix_b.dims();
-    auto dim_out = matrix_out->dims();
-    //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-    //  dim_out.size() ==
-    //  2,
-    //                 "The input and output of matmul be matrix");
-    //
-    //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-    //                     platform::is_cpu_place(matrix_b.place())
-    //                     &&
-    //                     platform::is_cpu_place(matrix_out->place()),
-    //                 "Matrix must all be in CPUPlace");
-
-    int M = dim_out[0];
-    int N = dim_out[1];
-    int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-
-    CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
-    CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
-
-    gemm<float>(transA, transB, M, N, K, alpha, matrix_a.data<float>(),
-                matrix_b.data<float>(), beta, matrix_out->data<float>());
+  auto dim_a = matrix_a.dims();
+  auto dim_b = matrix_b.dims();
+  auto dim_out = matrix_out->dims();
+  //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+  //  dim_out.size() ==
+  //  2,
+  //                 "The input and output of matmul be matrix");
+  //
+  //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
+  //                     platform::is_cpu_place(matrix_b.place())
+  //                     &&
+  //                     platform::is_cpu_place(matrix_out->place()),
+  //                 "Matrix must all be in CPUPlace");
+
+  int M = dim_out[0];
+  int N = dim_out[1];
+  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+
+  CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
+
+  gemm<float>(transA, transB, M, N, K, alpha, matrix_a.data<float>(),
+              matrix_b.data<float>(), beta, matrix_out->data<float>());
 }

 template <>
 void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
                    const framework::Tensor &matrix_b, bool trans_b,
                    double alpha, framework::Tensor *matrix_out, double beta) {
-    auto dim_a = matrix_a.dims();
-    auto dim_b = matrix_b.dims();
-    auto dim_out = matrix_out->dims();
-    //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
-    //  dim_out.size() ==
-    //  2,
-    //                 "The input and output of matmul be matrix");
-    //
-    //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
-    //                     platform::is_cpu_place(matrix_b.place())
-    //                     &&
-    //                     platform::is_cpu_place(matrix_out->place()),
-    //                 "Matrix must all be in CPUPlace");
-
-    int M = dim_out[0];
-    int N = dim_out[1];
-    int K = (trans_a == false) ? dim_a[1] : dim_a[0];
-
-    CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
-    CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
-
-    gemm<double>(transA, transB, M, N, K, alpha, matrix_a.data<double>(),
-                 matrix_b.data<double>(), beta, matrix_out->data<double>());
+  auto dim_a = matrix_a.dims();
+  auto dim_b = matrix_b.dims();
+  auto dim_out = matrix_out->dims();
+  //  PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
+  //  dim_out.size() ==
+  //  2,
+  //                 "The input and output of matmul be matrix");
+  //
+  //  PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
+  //                     platform::is_cpu_place(matrix_b.place())
+  //                     &&
+  //                     platform::is_cpu_place(matrix_out->place()),
+  //                 "Matrix must all be in CPUPlace");
+
+  int M = dim_out[0];
+  int N = dim_out[1];
+  int K = (trans_a == false) ? dim_a[1] : dim_a[0];
+
+  CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
+
+  gemm<double>(transA, transB, M, N, K, alpha, matrix_a.data<double>(),
+               matrix_b.data<double>(), beta, matrix_out->data<double>());
 }

 } // namespace math

--- a/src/operators/math/pool3x3.h
+++ b/src/operators/math/pool3x3.h
@@ -22,9 +22,9 @@ SOFTWARE.
 #endif // __ARM_NEON

 static void Pool3x3Max() {
-    // todo impl with neon
+  // todo impl with neon
 }

 static void Pool3x3Avg() {
-    // todo impl with neon
+  // todo impl with neon
 }
--- a/src/operators/math/pool_2x2.h
+++ b/src/operators/math/pool_2x2.h
@@ -22,9 +22,9 @@ SOFTWARE.
 #endif // __ARM_NEON

 static void Pool2x2Max() {
-    // todo impl with neon
+  // todo impl with neon
 }

 static void Pool2x2Avg() {
-    // todo impl with neon
+  // todo impl with neon
 }
--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
@@ -30,67 +30,65 @@ namespace math {
 */
 template <typename PoolProcess, typename T>
 class PoolFunctor<CPU, PoolProcess, T> {
-  public:
-    void operator()(const framework::Tensor &input,
-                    const std::vector<int> &ksize,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, PoolProcess pool_process,
-                    framework::Tensor *output) {
+public:
+  void operator()(const framework::Tensor &input, const std::vector<int> &ksize,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, PoolProcess pool_process,
+                  framework::Tensor *output) {

-        const int batch_size = input.dims()[0];
+    const int batch_size = input.dims()[0];

-        const int input_height = input.dims()[2];
+    const int input_height = input.dims()[2];

-        const int input_width = input.dims()[3];
-        if (output == nullptr) {
-            DLOG << "output tensor is null";
-        }
-        const int output_channels = output->dims()[1];
+    const int input_width = input.dims()[3];
+    if (output == nullptr) {
+      DLOG << "output tensor is null";
+    }
+    const int output_channels = output->dims()[1];

-        const int output_height = output->dims()[2];
-        const int output_width = output->dims()[3];
-        const int ksize_height = ksize[0];
-        const int ksize_width = ksize[1];
-        const int stride_height = strides[0];
-        const int stride_width = strides[1];
-        const int padding_height = paddings[0];
-        const int padding_width = paddings[1];
+    const int output_height = output->dims()[2];
+    const int output_width = output->dims()[3];
+    const int ksize_height = ksize[0];
+    const int ksize_width = ksize[1];
+    const int stride_height = strides[0];
+    const int stride_width = strides[1];
+    const int padding_height = paddings[0];
+    const int padding_width = paddings[1];

-        const int input_stride = input_height * input_width;
-        const int output_stride = output_height * output_width;
+    const int input_stride = input_height * input_width;
+    const int output_stride = output_height * output_width;

-        const T *input_data = input.data<T>();
-        T *output_data = output->mutable_data<T>();
+    const T *input_data = input.data<T>();
+    T *output_data = output->mutable_data<T>();

-        for (int i = 0; i < batch_size; i++) {
+    for (int i = 0; i < batch_size; i++) {
 #pragma omp parallel for
-            for (int c = 0; c < output_channels; ++c) {
-                for (int ph = 0; ph < output_height; ++ph) {
-                    int hstart = ph * stride_height - padding_height;
-                    int hend = std::min(hstart + ksize_height, input_height);
-                    hstart = std::max(hstart, 0);
-                    for (int pw = 0; pw < output_width; ++pw) {
-                        int wstart = pw * stride_width - padding_width;
-                        int wend = std::min(wstart + ksize_width, input_width);
-                        wstart = std::max(wstart, 0);
+      for (int c = 0; c < output_channels; ++c) {
+        for (int ph = 0; ph < output_height; ++ph) {
+          int hstart = ph * stride_height - padding_height;
+          int hend = std::min(hstart + ksize_height, input_height);
+          hstart = std::max(hstart, 0);
+          for (int pw = 0; pw < output_width; ++pw) {
+            int wstart = pw * stride_width - padding_width;
+            int wend = std::min(wstart + ksize_width, input_width);
+            wstart = std::max(wstart, 0);

-                        T ele = pool_process.initial();
-                        for (int h = hstart; h < hend; ++h) {
-                            for (int w = wstart; w < wend; ++w) {
-                                pool_process.compute(
-                                    input_data[h * input_width + w], &ele);
-                            }
-                        }
-                        int pool_size = (hend - hstart) * (wend - wstart);
-                        pool_process.finalize(static_cast<T>(pool_size), &ele);
-                        output_data[ph * output_width + pw] = ele;
-                    }
-                }
-                input_data += input_stride;
-                output_data += output_stride;
+            T ele = pool_process.initial();
+            for (int h = hstart; h < hend; ++h) {
+              for (int w = wstart; w < wend; ++w) {
+                pool_process.compute(input_data[h * input_width + w], &ele);
+              }
            }
+            int pool_size = (hend - hstart) * (wend - wstart);
+            pool_process.finalize(static_cast<T>(pool_size), &ele);
+            output_data[ph * output_width + pw] = ele;
+          }
        }
+        input_data += input_stride;
+        output_data += output_stride;
+      }
    }
+  }
 };

 template class PoolFunctor<CPU, math::AvgPool<float>, float>;

--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
@@ -38,31 +38,30 @@ namespace math {
 *        MaxPoolGrad and AvgPoolGrad are gradient operations respectively.
 */
 template <class T> class MaxPool {
-  public:
-    inline T initial() { return static_cast<T>(-FLT_MAX); }
+public:
+  inline T initial() { return static_cast<T>(-FLT_MAX); }

-    inline void compute(const T &x, T *y) { *y = *y > x ? *y : x; }
+  inline void compute(const T &x, T *y) { *y = *y > x ? *y : x; }

-    inline void finalize(const T &pool_field, T *y) {}
+  inline void finalize(const T &pool_field, T *y) {}
 };

 template <class T> class AvgPool {
-  public:
-    inline T initial() { return static_cast<T>(0); }
+public:
+  inline T initial() { return static_cast<T>(0); }

-    inline void compute(const T &x, T *y) { *y += x; }
+  inline void compute(const T &x, T *y) { *y += x; }

-    inline void finalize(const T &pool_field, T *y) { *y /= pool_field; }
+  inline void finalize(const T &pool_field, T *y) { *y /= pool_field; }
 };

 template <typename DeviceType, typename PoolProcess, typename T>
 class PoolFunctor {
-  public:
-    void operator()(const framework::Tensor &input,
-                    const std::vector<int> &ksize,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, PoolProcess pool_compute,
-                    framework::Tensor *output);
+public:
+  void operator()(const framework::Tensor &input, const std::vector<int> &ksize,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, PoolProcess pool_compute,
+                  framework::Tensor *output);
 };
 }
 } // namespace operators

--- a/src/operators/math/transform.h
+++ b/src/operators/math/transform.h
@@ -37,18 +37,18 @@ namespace math {
 //       class, paddle::fluid::operators::RowwiseTRansformIterator.

 struct Transform {
-    template <typename InputIter, typename OutputIter, typename UnaryOperation>
-    void operator()(InputIter first, InputIter last, OutputIter result,
-                    UnaryOperation op) {
-        std::transform(first, last, result, op);
-    }
-
-    template <typename InputIter1, typename InputIter2, typename OutputIter,
-              typename BinaryOperation>
-    void operator()(InputIter1 first1, InputIter1 last1, InputIter2 first2,
-                    OutputIter result, BinaryOperation op) {
-        std::transform(first1, last1, first2, result, op);
-    }
+  template <typename InputIter, typename OutputIter, typename UnaryOperation>
+  void operator()(InputIter first, InputIter last, OutputIter result,
+                  UnaryOperation op) {
+    std::transform(first, last, result, op);
+  }
+
+  template <typename InputIter1, typename InputIter2, typename OutputIter,
+            typename BinaryOperation>
+  void operator()(InputIter1 first1, InputIter1 last1, InputIter2 first2,
+                  OutputIter result, BinaryOperation op) {
+    std::transform(first1, last1, first2, result, op);
+  }
 };
 } // namespace math
 } // namespace operators

--- a/src/operators/math/vol2col.cc
+++ b/src/operators/math/vol2col.cc
@@ -26,90 +26,83 @@ using Tensor = paddle_mobile::framework::Tensor;
 *                    output_depth, output_height, output_width]
 */
 template <typename T> class Vol2ColFunctor<CPU, T> {
-  public:
-    void operator()(const Tensor &vol, const std::vector<int> &dilations,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, Tensor *col) const {
-        //    PADDLE_ENFORCE(vol.dims().size() == 4);
-        //    PADDLE_ENFORCE(col->dims().size() == 7);
-
-        int input_channels = vol.dims()[0];
-        int input_depth = vol.dims()[1];
-        int input_height = vol.dims()[2];
-        int input_width = vol.dims()[3];
-        int filter_depth = col->dims()[1];
-        int filter_height = col->dims()[2];
-        int filter_width = col->dims()[3];
-        int output_depth = col->dims()[4];
-        int output_height = col->dims()[5];
-        int output_width = col->dims()[6];
-        int channels_col =
-            input_channels * filter_depth * filter_height * filter_width;
-
-        //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-        //                       ((dilations[0] * (filter_depth - 1)
-        //                       + 1))) /
-        //                              strides[0] +
-        //                          1,
-        //                      output_depth,
-        //                      "input_depth and output_depth are "
-        //                      "mismatching.");
-        //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-        //                       ((dilations[1] * (filter_height -
-        //                       1) + 1))) /
-        //                              strides[1] +
-        //                          1,
-        //                      output_height,
-        //                      "input_height and output_height are
-        //                      "
-        //                      "mismatching.");
-        //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-        //                       ((dilations[2] * (filter_width - 1)
-        //                       + 1))) /
-        //                              strides[2] +
-        //                          1,
-        //                      output_width,
-        //                      "input_width and output_width are "
-        //                      "mismatching.");
-
-        const T *vol_data = vol.data<T>();
-        T *col_data = col->data<T>();
-
-        for (int c = 0; c < channels_col; ++c) {
-            int w_offset = c % filter_width;
-            int h_offset = (c / filter_width) % filter_height;
-            int d_offset = (c / filter_width / filter_height) % filter_depth;
-            int c_in = c / filter_width / filter_height / filter_depth;
-            for (int d = 0; d < output_depth; ++d) {
-                int d_pad =
-                    d * strides[0] - paddings[0] + d_offset * dilations[0];
-                for (int h = 0; h < output_height; ++h) {
-                    int h_pad =
-                        h * strides[1] - paddings[1] + h_offset * dilations[1];
-                    for (int w = 0; w < output_width; ++w) {
-                        int w_pad = w * strides[2] - paddings[2] +
-                                    w_offset * dilations[2];
-
-                        int col_idx =
-                            ((c * output_depth + d) * output_height + h) *
-                                output_width +
-                            w;
-                        int vol_idx =
-                            ((c_in * input_depth + d_pad) * input_height +
-                             h_pad) *
-                                input_width +
-                            w_pad;
-                        col_data[col_idx] =
-                            (h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
-                             w_pad >= input_width || d_pad < 0 ||
-                             d_pad >= input_depth)
-                                ? static_cast<T>(0)
-                                : vol_data[vol_idx];
-                    }
-                }
-            }
+public:
+  void operator()(const Tensor &vol, const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, Tensor *col) const {
+    //    PADDLE_ENFORCE(vol.dims().size() == 4);
+    //    PADDLE_ENFORCE(col->dims().size() == 7);
+
+    int input_channels = vol.dims()[0];
+    int input_depth = vol.dims()[1];
+    int input_height = vol.dims()[2];
+    int input_width = vol.dims()[3];
+    int filter_depth = col->dims()[1];
+    int filter_height = col->dims()[2];
+    int filter_width = col->dims()[3];
+    int output_depth = col->dims()[4];
+    int output_height = col->dims()[5];
+    int output_width = col->dims()[6];
+    int channels_col =
+        input_channels * filter_depth * filter_height * filter_width;
+
+    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+    //                       ((dilations[0] * (filter_depth - 1)
+    //                       + 1))) /
+    //                              strides[0] +
+    //                          1,
+    //                      output_depth,
+    //                      "input_depth and output_depth are "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+    //                       ((dilations[1] * (filter_height -
+    //                       1) + 1))) /
+    //                              strides[1] +
+    //                          1,
+    //                      output_height,
+    //                      "input_height and output_height are
+    //                      "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+    //                       ((dilations[2] * (filter_width - 1)
+    //                       + 1))) /
+    //                              strides[2] +
+    //                          1,
+    //                      output_width,
+    //                      "input_width and output_width are "
+    //                      "mismatching.");
+
+    const T *vol_data = vol.data<T>();
+    T *col_data = col->data<T>();
+
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int d_offset = (c / filter_width / filter_height) % filter_depth;
+      int c_in = c / filter_width / filter_height / filter_depth;
+      for (int d = 0; d < output_depth; ++d) {
+        int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
+        for (int h = 0; h < output_height; ++h) {
+          int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
+          for (int w = 0; w < output_width; ++w) {
+            int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
+
+            int col_idx =
+                ((c * output_depth + d) * output_height + h) * output_width + w;
+            int vol_idx =
+                ((c_in * input_depth + d_pad) * input_height + h_pad) *
+                    input_width +
+                w_pad;
+            col_data[col_idx] =
+                (h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
+                 w_pad >= input_width || d_pad < 0 || d_pad >= input_depth)
+                    ? static_cast<T>(0)
+                    : vol_data[vol_idx];
+          }
        }
+      }
    }
+  }
 };

 /*
@@ -119,89 +112,83 @@ template <typename T> class Vol2ColFunctor<CPU, T> {
 *                    output_depth, output_height, output_width]
 */
 template <typename T> class Col2VolFunctor<CPU, T> {
-  public:
-    void operator()(const Tensor &col, const std::vector<int> &dilations,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, Tensor *vol) const {
-        //    PADDLE_ENFORCE(vol->dims().size() == 4);
-        //    PADDLE_ENFORCE(col.dims().size() == 7);
-
-        int input_channels = vol->dims()[0];
-        int input_depth = vol->dims()[1];
-        int input_height = vol->dims()[2];
-        int input_width = vol->dims()[3];
-        int filter_depth = col.dims()[1];
-        int filter_height = col.dims()[2];
-        int filter_width = col.dims()[3];
-        int output_depth = col.dims()[4];
-        int output_height = col.dims()[5];
-        int output_width = col.dims()[6];
-        int channels_col =
-            input_channels * filter_depth * filter_height * filter_width;
-
-        //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
-        //                       ((dilations[0] * (filter_depth - 1)
-        //                       + 1))) /
-        //                              strides[0] +
-        //                          1,
-        //                      output_depth,
-        //                      "input_depth and output_depth are "
-        //                      "mismatching.");
-        //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
-        //                       ((dilations[1] * (filter_height -
-        //                       1) + 1))) /
-        //                              strides[1] +
-        //                          1,
-        //                      output_height,
-        //                      "input_height and output_height are
-        //                      "
-        //                      "mismatching.");
-        //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
-        //                       ((dilations[2] * (filter_width - 1)
-        //                       + 1))) /
-        //                              strides[2] +
-        //                          1,
-        //                      output_width,
-        //                      "input_width and output_width are "
-        //                      "mismatching.");
-        T *vol_data = vol->data<T>();
-        const T *col_data = col.data<T>();
-
-        for (int c = 0; c < channels_col; ++c) {
-            int w_offset = c % filter_width;
-            int h_offset = (c / filter_width) % filter_height;
-            int d_offset = (c / filter_width / filter_height) % filter_depth;
-            int cIm = c / filter_width / filter_height / filter_depth;
-            for (int d = 0; d < output_depth; ++d) {
-                int d_pad =
-                    d * strides[0] - paddings[0] + d_offset * dilations[0];
-                for (int h = 0; h < output_height; ++h) {
-                    int h_pad =
-                        h * strides[1] - paddings[1] + h_offset * dilations[1];
-                    for (int w = 0; w < output_width; ++w) {
-                        int w_pad = w * strides[2] - paddings[2] +
-                                    w_offset * dilations[2];
-
-                        if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
-                            w_pad < input_width && d_pad >= 0 &&
-                            d_pad < input_depth) {
-                            int vol_idx =
-                                ((cIm * input_depth + d_pad) * input_height +
-                                 h_pad) *
-                                    input_width +
-                                w_pad;
-
-                            int col_idx =
-                                ((c * output_depth + d) * output_height + h) *
-                                    output_width +
-                                w;
-                            vol_data[vol_idx] += col_data[col_idx];
-                        }
-                    }
-                }
+public:
+  void operator()(const Tensor &col, const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, Tensor *vol) const {
+    //    PADDLE_ENFORCE(vol->dims().size() == 4);
+    //    PADDLE_ENFORCE(col.dims().size() == 7);
+
+    int input_channels = vol->dims()[0];
+    int input_depth = vol->dims()[1];
+    int input_height = vol->dims()[2];
+    int input_width = vol->dims()[3];
+    int filter_depth = col.dims()[1];
+    int filter_height = col.dims()[2];
+    int filter_width = col.dims()[3];
+    int output_depth = col.dims()[4];
+    int output_height = col.dims()[5];
+    int output_width = col.dims()[6];
+    int channels_col =
+        input_channels * filter_depth * filter_height * filter_width;
+
+    //    PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
+    //                       ((dilations[0] * (filter_depth - 1)
+    //                       + 1))) /
+    //                              strides[0] +
+    //                          1,
+    //                      output_depth,
+    //                      "input_depth and output_depth are "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
+    //                       ((dilations[1] * (filter_height -
+    //                       1) + 1))) /
+    //                              strides[1] +
+    //                          1,
+    //                      output_height,
+    //                      "input_height and output_height are
+    //                      "
+    //                      "mismatching.");
+    //    PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
+    //                       ((dilations[2] * (filter_width - 1)
+    //                       + 1))) /
+    //                              strides[2] +
+    //                          1,
+    //                      output_width,
+    //                      "input_width and output_width are "
+    //                      "mismatching.");
+    T *vol_data = vol->data<T>();
+    const T *col_data = col.data<T>();
+
+    for (int c = 0; c < channels_col; ++c) {
+      int w_offset = c % filter_width;
+      int h_offset = (c / filter_width) % filter_height;
+      int d_offset = (c / filter_width / filter_height) % filter_depth;
+      int cIm = c / filter_width / filter_height / filter_depth;
+      for (int d = 0; d < output_depth; ++d) {
+        int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
+        for (int h = 0; h < output_height; ++h) {
+          int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
+          for (int w = 0; w < output_width; ++w) {
+            int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
+
+            if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
+                w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {
+              int vol_idx =
+                  ((cIm * input_depth + d_pad) * input_height + h_pad) *
+                      input_width +
+                  w_pad;
+
+              int col_idx =
+                  ((c * output_depth + d) * output_height + h) * output_width +
+                  w;
+              vol_data[vol_idx] += col_data[col_idx];
            }
+          }
        }
+      }
    }
+  }
 };

 template class Vol2ColFunctor<CPU, float>;

--- a/src/operators/math/vol2col.h
+++ b/src/operators/math/vol2col.h
@@ -73,17 +73,17 @@ namespace math {
 using Tensor = paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T> class Vol2ColFunctor {
-  public:
-    void operator()(const Tensor &vol, const std::vector<int> &dilations,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, Tensor *col) const;
+public:
+  void operator()(const Tensor &vol, const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, Tensor *col) const;
 };

 template <typename DeviceType, typename T> class Col2VolFunctor {
-  public:
-    void operator()(const Tensor &col, const std::vector<int> &dilations,
-                    const std::vector<int> &strides,
-                    const std::vector<int> &paddings, Tensor *vol) const;
+public:
+  void operator()(const Tensor &col, const std::vector<int> &dilations,
+                  const std::vector<int> &strides,
+                  const std::vector<int> &paddings, Tensor *vol) const;
 };

 } // namespace math

--- a/src/operators/mul_op.cpp
+++ b/src/operators/mul_op.cpp
@@ -22,34 +22,34 @@ namespace paddle_mobile {
 namespace operators {

 template <typename Dtype, typename T> void MulOp<Dtype, T>::InferShape() const {
-    auto x_dims = param_.InputX()->dims();
-    auto y_dims = param_.InputY()->dims();
-    int x_num_col_dims = param_.XNumColDims();
-    int y_num_col_dims = param_.YNumColDims();
+  auto x_dims = param_.InputX()->dims();
+  auto y_dims = param_.InputY()->dims();
+  int x_num_col_dims = param_.XNumColDims();
+  int y_num_col_dims = param_.YNumColDims();

-    assert(x_dims.size() > x_num_col_dims);
-    assert(y_dims.size() > y_num_col_dims);
+  assert(x_dims.size() > x_num_col_dims);
+  assert(y_dims.size() > y_num_col_dims);

-    /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
-    auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
-    auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);
+  /// (1,2,3,4) , x_num_col_dims = 2  -> (2,12)
+  auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims);
+  auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims);

-    assert(x_mat_dims[1] == y_mat_dims[0]);
+  assert(x_mat_dims[1] == y_mat_dims[0]);

-    std::vector<int64_t> output_dims;
-    output_dims.reserve(
-        static_cast<size_t>(x_num_col_dims + y_dims.size() - y_num_col_dims));
+  std::vector<int64_t> output_dims;
+  output_dims.reserve(
+      static_cast<size_t>(x_num_col_dims + y_dims.size() - y_num_col_dims));

-    for (int i = 0; i < x_num_col_dims; ++i) {
-        output_dims.push_back(x_dims[i]);
-    }
+  for (int i = 0; i < x_num_col_dims; ++i) {
+    output_dims.push_back(x_dims[i]);
+  }

-    for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
-        output_dims.push_back(y_dims[i]);
-    }
+  for (int i = y_num_col_dims; i < y_dims.size(); ++i) {
+    output_dims.push_back(y_dims[i]);
+  }

-    framework::DDim ddim = framework::make_ddim(output_dims);
-    param_.Out()->Resize(ddim);
+  framework::DDim ddim = framework::make_ddim(output_dims);
+  param_.Out()->Resize(ddim);
 }
 template class MulOp<CPU, float>;
 } // namespace operators

--- a/src/operators/mul_op.h
+++ b/src/operators/mul_op.h
@@ -27,24 +27,24 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class MulOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    MulOp(const std::string &type, const VariableNameMap &inputs,
-          const VariableNameMap &outputs, const framework::AttributeMap attrs,
-          std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
-
-    void Run() const {
-        operators::MulKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-    }
-
-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
-
-  protected:
-    MulParam param_;
+public:
+  MulOp(const std::string &type, const VariableNameMap &inputs,
+        const VariableNameMap &outputs, const framework::AttributeMap attrs,
+        std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+
+  void Run() const {
+    operators::MulKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+  }
+
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;
+
+protected:
+  MulParam param_;
 };

 } // namespace operators

--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -21,25 +21,24 @@ SOFTWARE.
 namespace paddle_mobile {
 namespace operators {
 Print &operator<<(Print &printer, const ConvParam &conv_param) {
-    printer << "parameter of conv: "
-            << "\n";
-    printer << "  stride: "
-            << " (" << conv_param.Strides()[0] << conv_param.Strides()[1]
-            << ") "
-            << "\n";
-    printer << "  paddings: "
-            << " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1]
-            << ") "
-            << "\n";
-    printer << "  dilations: "
-            << " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1]
-            << ") "
-            << "\n";
-    printer << "  groups: " << conv_param.Groups() << "\n";
-    printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
-    printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
-    printer << "  output dims: " << conv_param.Output()->dims();
-    return printer;
+  printer << "parameter of conv: "
+          << "\n";
+  printer << "  stride: "
+          << " (" << conv_param.Strides()[0] << conv_param.Strides()[1] << ") "
+          << "\n";
+  printer << "  paddings: "
+          << " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1]
+          << ") "
+          << "\n";
+  printer << "  dilations: "
+          << " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1]
+          << ") "
+          << "\n";
+  printer << "  groups: " << conv_param.Groups() << "\n";
+  printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
+  printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
+  printer << "  output dims: " << conv_param.Output()->dims();
+  return printer;
 }
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -31,357 +31,355 @@ namespace operators {
 using namespace framework;

 class OpParam : PaddleMobileObject {
-  public:
-  protected:
-    template <typename T>
-    static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("Input", inputs, scope);
+public:
+protected:
+  template <typename T>
+  static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Input", inputs, scope);
+  }
+
+  template <typename T>
+  static T *InputXFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("X", inputs, scope);
+  }
+
+  template <typename T>
+  static T *InputYFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Y", inputs, scope);
+  }
+
+  template <typename T>
+  static T *InputBiasFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Bias", inputs, scope);
+  }
+  template <typename T>
+  static T *InputVarianceFrom(const VariableNameMap &inputs,
+                              const Scope &scope) {
+    return GetVarValue<T>("Variance", inputs, scope);
+  }
+  template <typename T>
+  static T *InputMeanFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Mean", inputs, scope);
+  }
+  template <typename T>
+  static T *InputScaleFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Scale", inputs, scope);
+  }
+
+  template <typename T>
+  static std::vector<T *> InputMultiFrom(const VariableNameMap &inputs,
+                                         const Scope &scope) {
+    return GetMultiVarValue<T>("X", inputs, scope);
+  }
+
+  template <typename T>
+  static T *OutputFrom(const VariableNameMap &outputs, const Scope &scope) {
+    return GetVarValue<T>("Output", outputs, scope);
+  }
+
+  template <typename T>
+  static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
+    return GetVarValue<T>("Out", outputs, scope);
+  }
+
+  template <typename T>
+  static T *OutputYFrom(const VariableNameMap &outputs, const Scope &scope) {
+    return GetVarValue<T>("Y", outputs, scope);
+  }
+
+  template <typename T>
+  static T *MidOutFrom(const VariableNameMap &outputs, const Scope &scope) {
+    return GetVarValue<T>("MidOut", outputs, scope);
+  }
+
+  template <typename T>
+  static T *FilterFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Filter", inputs, scope);
+  }
+
+  template <typename T>
+  static const T GetAttr(const std::string &key, const AttributeMap &map) {
+    return ((Attribute)map.at(key)).Get<T>();
+  }
+
+  template <typename T>
+  static T *GetVarValue(const std::string &key, const VariableNameMap &var_map,
+                        const Scope &scope) {
+    auto var_vec = var_map.at(key);
+    if (!var_vec.empty()) {
+      //      std::cout << " get var value -- " << var_vec[0] <<
+      //      std::endl;
+      auto var = scope.FindVar(var_vec[0]);
+      return var->GetMutable<T>();
+    } else {
+      return nullptr;
    }
+  }

-    template <typename T>
-    static T *InputXFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("X", inputs, scope);
-    }
-
-    template <typename T>
-    static T *InputYFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("Y", inputs, scope);
-    }
-
-    template <typename T>
-    static T *InputBiasFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("Bias", inputs, scope);
-    }
-    template <typename T>
-    static T *InputVarianceFrom(const VariableNameMap &inputs,
-                                const Scope &scope) {
-        return GetVarValue<T>("Variance", inputs, scope);
-    }
-    template <typename T>
-    static T *InputMeanFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("Mean", inputs, scope);
-    }
-    template <typename T>
-    static T *InputScaleFrom(const VariableNameMap &inputs,
-                             const Scope &scope) {
-        return GetVarValue<T>("Scale", inputs, scope);
-    }
-
-    template <typename T>
-    static std::vector<T *> InputMultiFrom(const VariableNameMap &inputs,
+  template <typename T>
+  static std::vector<T *> GetMultiVarValue(const std::string &key,
+                                           const VariableNameMap &var_map,
                                           const Scope &scope) {
-        return GetMultiVarValue<T>("X", inputs, scope);
-    }
-
-    template <typename T>
-    static T *OutputFrom(const VariableNameMap &outputs, const Scope &scope) {
-        return GetVarValue<T>("Output", outputs, scope);
-    }
-
-    template <typename T>
-    static T *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
-        return GetVarValue<T>("Out", outputs, scope);
-    }
-
-    template <typename T>
-    static T *OutputYFrom(const VariableNameMap &outputs, const Scope &scope) {
-        return GetVarValue<T>("Y", outputs, scope);
-    }
-
-    template <typename T>
-    static T *MidOutFrom(const VariableNameMap &outputs, const Scope &scope) {
-        return GetVarValue<T>("MidOut", outputs, scope);
-    }
-
-    template <typename T>
-    static T *FilterFrom(const VariableNameMap &inputs, const Scope &scope) {
-        return GetVarValue<T>("Filter", inputs, scope);
-    }
-
-    template <typename T>
-    static const T GetAttr(const std::string &key, const AttributeMap &map) {
-        return ((Attribute)map.at(key)).Get<T>();
-    }
-
-    template <typename T>
-    static T *GetVarValue(const std::string &key,
-                          const VariableNameMap &var_map, const Scope &scope) {
-        auto var_vec = var_map.at(key);
-        if (!var_vec.empty()) {
-            //      std::cout << " get var value -- " << var_vec[0] <<
-            //      std::endl;
-            auto var = scope.FindVar(var_vec[0]);
-            return var->GetMutable<T>();
-        } else {
-            return nullptr;
-        }
-    }
-
-    template <typename T>
-    static std::vector<T *> GetMultiVarValue(const std::string &key,
-                                             const VariableNameMap &var_map,
-                                             const Scope &scope) {
-        auto var_vecs = var_map.at(key);
-        assert(var_vecs.size() > 1);
-        std::vector<T *> var_res;
-        for (auto &var_vec : var_vecs) {
-            auto var = scope.FindVar(var_vec);
-            var_res.push_back(var->GetMutable<T>());
-        }
-        return var_res;
+    auto var_vecs = var_map.at(key);
+    assert(var_vecs.size() > 1);
+    std::vector<T *> var_res;
+    for (auto &var_vec : var_vecs) {
+      auto var = scope.FindVar(var_vec);
+      var_res.push_back(var->GetMutable<T>());
    }
+    return var_res;
+  }
 };

 class ConvParam : OpParam {
-  public:
-    ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-              const framework::AttributeMap &attrs,
-              const framework::Scope &scope) {
-        filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
-        input_ = InputFrom<framework::Tensor>(inputs, scope);
-        output_ = OutputFrom<framework::Tensor>(outputs, scope);
-        strides_ = GetAttr<std::vector<int>>("strides", attrs);
-        paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-        dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
-        groups = GetAttr<int>("groups", attrs);
-    }
+public:
+  ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+            const framework::AttributeMap &attrs,
+            const framework::Scope &scope) {
+    filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
+    input_ = InputFrom<framework::Tensor>(inputs, scope);
+    output_ = OutputFrom<framework::Tensor>(outputs, scope);
+    strides_ = GetAttr<std::vector<int>>("strides", attrs);
+    paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+  }

-    const Tensor *Input() const { return input_; }
+  const Tensor *Input() const { return input_; }

-    const LoDTensor *Filter() const { return filter_; }
+  const LoDTensor *Filter() const { return filter_; }

-    Tensor *Output() const { return output_; }
+  Tensor *Output() const { return output_; }

-    const std::vector<int> &Strides() const { return strides_; }
+  const std::vector<int> &Strides() const { return strides_; }

-    const std::vector<int> &Paddings() const { return paddings_; }
+  const std::vector<int> &Paddings() const { return paddings_; }

-    const std::vector<int> &Dilations() const { return dilations_; }
+  const std::vector<int> &Dilations() const { return dilations_; }

-    const int &Groups() const { return groups; }
+  const int &Groups() const { return groups; }

-  private:
-    Tensor *input_;
-    Tensor *output_;
-    LoDTensor *filter_;
-    std::vector<int> strides_;
-    std::vector<int> paddings_;
-    std::vector<int> dilations_;
-    int groups;
+private:
+  Tensor *input_;
+  Tensor *output_;
+  LoDTensor *filter_;
+  std::vector<int> strides_;
+  std::vector<int> paddings_;
+  std::vector<int> dilations_;
+  int groups;
 };

 Print &operator<<(Print &printer, const ConvParam &conv_param);

 class ElementwiseAddParam : OpParam {
-  public:
-    ElementwiseAddParam(const VariableNameMap &inputs,
-                        const VariableNameMap &outputs,
-                        const framework::AttributeMap &attrs,
-                        const framework::Scope &scope) {
-        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-        input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-        out_ = OutFrom<framework::Tensor>(outputs, scope);
-        axis_ = GetAttr<int>("axis", attrs);
-    }
-
-    const Tensor *InputX() const { return input_x_; }
-
-    const Tensor *InputY() const { return input_y_; }
-
-    Tensor *Out() const { return out_; }
-
-    const int &Axis() const { return axis_; }
-
-  private:
-    Tensor *input_x_;
-    Tensor *input_y_;
-    Tensor *out_;
-    int axis_;
+public:
+  ElementwiseAddParam(const VariableNameMap &inputs,
+                      const VariableNameMap &outputs,
+                      const framework::AttributeMap &attrs,
+                      const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+  }
+
+  const Tensor *InputX() const { return input_x_; }
+
+  const Tensor *InputY() const { return input_y_; }
+
+  Tensor *Out() const { return out_; }
+
+  const int &Axis() const { return axis_; }
+
+private:
+  Tensor *input_x_;
+  Tensor *input_y_;
+  Tensor *out_;
+  int axis_;
 };

 class MulParam : OpParam {
-  public:
-    MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-             const framework::AttributeMap &attrs,
-             const framework::Scope &scope) {
-        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-        input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
-        out_ = OutFrom<framework::Tensor>(outputs, scope);
-        x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
-        y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
-    }
+public:
+  MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+           const framework::AttributeMap &attrs,
+           const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
+    y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
+  }

-    const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputX() const { return input_x_; }

-    const Tensor *InputY() const { return input_y_; }
+  const Tensor *InputY() const { return input_y_; }

-    Tensor *Out() const { return out_; }
+  Tensor *Out() const { return out_; }

-    const int &XNumColDims() const { return x_num_col_dims_; }
+  const int &XNumColDims() const { return x_num_col_dims_; }

-    const int &YNumColDims() const { return y_num_col_dims_; }
+  const int &YNumColDims() const { return y_num_col_dims_; }

-  private:
-    Tensor *input_x_;
-    Tensor *input_y_;
-    Tensor *out_;
-    int x_num_col_dims_;
-    int y_num_col_dims_;
+private:
+  Tensor *input_x_;
+  Tensor *input_y_;
+  Tensor *out_;
+  int x_num_col_dims_;
+  int y_num_col_dims_;
 };

 class ConcatParam : public OpParam {
-  public:
-    ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-                const framework::AttributeMap &attrs,
-                const framework::Scope &scope) {
-        inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
-        out_ = OutFrom<framework::Tensor>(outputs, scope);
-        axis_ = GetAttr<int>("axis", attrs);
-    }
+public:
+  ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+              const framework::AttributeMap &attrs,
+              const framework::Scope &scope) {
+    inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+  }

-    std::vector<Tensor *> Inputs() const { return inputs_; }
+  std::vector<Tensor *> Inputs() const { return inputs_; }

-    Tensor *Out() const { return out_; }
+  Tensor *Out() const { return out_; }

-    const int &Axis() const { return axis_; }
+  const int &Axis() const { return axis_; }

-  private:
-    std::vector<Tensor *> inputs_;
-    Tensor *out_;
-    int axis_;
+private:
+  std::vector<Tensor *> inputs_;
+  Tensor *out_;
+  int axis_;
 };

 class LrnParam : public OpParam {
-  public:
-    LrnParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-             const framework::AttributeMap &attrs,
-             const framework::Scope &scope) {
-        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-        out_ = OutFrom<framework::Tensor>(outputs, scope);
-        mid_out_ = MidOutFrom<framework::Tensor>(outputs, scope);
-        n_ = GetAttr<int>("n", attrs);
-        alpha_ = GetAttr<float>("alpha", attrs);
-        beta_ = GetAttr<float>("beta", attrs);
-        k_ = GetAttr<float>("k", attrs);
-        data_format_ = GetAttr<std::string>("data_format", attrs);
-    }
+public:
+  LrnParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+           const framework::AttributeMap &attrs,
+           const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    out_ = OutFrom<framework::Tensor>(outputs, scope);
+    mid_out_ = MidOutFrom<framework::Tensor>(outputs, scope);
+    n_ = GetAttr<int>("n", attrs);
+    alpha_ = GetAttr<float>("alpha", attrs);
+    beta_ = GetAttr<float>("beta", attrs);
+    k_ = GetAttr<float>("k", attrs);
+    data_format_ = GetAttr<std::string>("data_format", attrs);
+  }

-    const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputX() const { return input_x_; }

-    Tensor *Out() const { return out_; }
+  Tensor *Out() const { return out_; }

-    Tensor *MidOut() const { return mid_out_; }
+  Tensor *MidOut() const { return mid_out_; }

-    const int &N() const { return n_; }
+  const int &N() const { return n_; }

-    const float &Alpha() const { return alpha_; }
+  const float &Alpha() const { return alpha_; }

-    const float &Beta() const { return beta_; }
+  const float &Beta() const { return beta_; }

-    const float &K() const { return k_; }
+  const float &K() const { return k_; }

-    const std::string &DataFormat() const { return data_format_; }
+  const std::string &DataFormat() const { return data_format_; }

-  private:
-    Tensor *input_x_;
-    Tensor *out_;
-    Tensor *mid_out_;
-    int n_;
-    float alpha_;
-    float beta_;
-    float k_;
-    std::string data_format_;
+private:
+  Tensor *input_x_;
+  Tensor *out_;
+  Tensor *mid_out_;
+  int n_;
+  float alpha_;
+  float beta_;
+  float k_;
+  std::string data_format_;
 };
 class BatchNormParam : OpParam {
-  public:
-    BatchNormParam(const VariableNameMap &inputs,
-                   const VariableNameMap &outputs,
-                   const framework::AttributeMap &attrs,
-                   const framework::Scope &scope) {
-        input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
-        output_y_ = OutputYFrom<framework::Tensor>(outputs, scope);
-        input_bias_ = InputBiasFrom<framework::Tensor>(inputs, scope);
-        input_mean_ = InputMeanFrom<framework::Tensor>(inputs, scope);
-        input_scale_ = InputScaleFrom<framework::Tensor>(inputs, scope);
-        input_variance_ = InputVarianceFrom<framework::Tensor>(inputs, scope);
-        epsilon_ = GetAttr<float>("epsilon", attrs);
-        momentum_ = GetAttr<float>("momentum", attrs);
-        is_test_ = GetAttr<bool>("is_test", attrs);
-    }
+public:
+  BatchNormParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+                 const framework::AttributeMap &attrs,
+                 const framework::Scope &scope) {
+    input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+    output_y_ = OutputYFrom<framework::Tensor>(outputs, scope);
+    input_bias_ = InputBiasFrom<framework::Tensor>(inputs, scope);
+    input_mean_ = InputMeanFrom<framework::Tensor>(inputs, scope);
+    input_scale_ = InputScaleFrom<framework::Tensor>(inputs, scope);
+    input_variance_ = InputVarianceFrom<framework::Tensor>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    is_test_ = GetAttr<bool>("is_test", attrs);
+  }

-    const Tensor *InputX() const { return input_x_; }
+  const Tensor *InputX() const { return input_x_; }

-    Tensor *OutputY() const { return output_y_; }
+  Tensor *OutputY() const { return output_y_; }

-    const Tensor *InputBias() const { return input_bias_; }
+  const Tensor *InputBias() const { return input_bias_; }

-    const Tensor *InputMean() const { return input_mean_; }
+  const Tensor *InputMean() const { return input_mean_; }

-    const Tensor *InputScale() const { return input_scale_; }
+  const Tensor *InputScale() const { return input_scale_; }

-    const Tensor *InputVariance() const { return input_variance_; }
+  const Tensor *InputVariance() const { return input_variance_; }

-    const float &Epsilon() const { return epsilon_; }
+  const float &Epsilon() const { return epsilon_; }

-    const float &Momentum() const { return momentum_; }
+  const float &Momentum() const { return momentum_; }

-    const bool &IsTest() const { return is_test_; }
+  const bool &IsTest() const { return is_test_; }

-    const std::string &DataFormat() const { return data_format_; }
+  const std::string &DataFormat() const { return data_format_; }

-  private:
-    Tensor *input_x_;
-    Tensor *output_y_;
-    Tensor *input_bias_;
-    Tensor *input_mean_;
-    Tensor *input_scale_;
-    Tensor *input_variance_;
-    float epsilon_;
-    float momentum_;
-    bool is_test_;
-    std::string data_format_;
+private:
+  Tensor *input_x_;
+  Tensor *output_y_;
+  Tensor *input_bias_;
+  Tensor *input_mean_;
+  Tensor *input_scale_;
+  Tensor *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  std::string data_format_;
 };
 class PoolParam : public OpParam {
-  public:
-    PoolParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
-              const framework::AttributeMap &attrs,
-              const framework::Scope &scope) {
-        input_ = InputXFrom<framework::Tensor>(inputs, scope);
-
-        output_ = OutFrom<framework::Tensor>(outputs, scope);
-        pooling_type_ = GetAttr<std::string>("pooling_type", attrs);
-        ksize_ = GetAttr<std::vector<int>>("ksize", attrs);
-        strides_ = GetAttr<std::vector<int>>("strides", attrs);
-        paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
-        ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
-        gloabal_pooling_ = GetAttr<bool>("global_pooling", attrs);
-    }
+public:
+  PoolParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+            const framework::AttributeMap &attrs,
+            const framework::Scope &scope) {
+    input_ = InputXFrom<framework::Tensor>(inputs, scope);
+
+    output_ = OutFrom<framework::Tensor>(outputs, scope);
+    pooling_type_ = GetAttr<std::string>("pooling_type", attrs);
+    ksize_ = GetAttr<std::vector<int>>("ksize", attrs);
+    strides_ = GetAttr<std::vector<int>>("strides", attrs);
+    paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
+    ceil_mode_ = GetAttr<bool>("ceil_mode", attrs);
+    gloabal_pooling_ = GetAttr<bool>("global_pooling", attrs);
+  }

-    const Tensor *Input() const { return input_; }
+  const Tensor *Input() const { return input_; }

-    Tensor *Output() const { return output_; }
+  Tensor *Output() const { return output_; }

-    const std::string &PoolingType() const { return pooling_type_; }
+  const std::string &PoolingType() const { return pooling_type_; }

-    const std::vector<int> &Ksize() const { return ksize_; }
+  const std::vector<int> &Ksize() const { return ksize_; }

-    const std::vector<int> &Strides() const { return strides_; }
+  const std::vector<int> &Strides() const { return strides_; }

-    const std::vector<int> &Paddings() const { return paddings_; }
+  const std::vector<int> &Paddings() const { return paddings_; }

-    bool isCeilMode() const { return ceil_mode_; }
+  bool isCeilMode() const { return ceil_mode_; }

-    bool isGlobalPooling() const { return gloabal_pooling_; }
+  bool isGlobalPooling() const { return gloabal_pooling_; }

-  private:
-    Tensor *input_;
-    Tensor *output_;
-    std::string pooling_type_;
-    std::vector<int> ksize_;
-    std::vector<int> strides_;
-    std::vector<int> paddings_;
-    bool ceil_mode_;
-    bool gloabal_pooling_ = false;
+private:
+  Tensor *input_;
+  Tensor *output_;
+  std::string pooling_type_;
+  std::vector<int> ksize_;
+  std::vector<int> strides_;
+  std::vector<int> paddings_;
+  bool ceil_mode_;
+  bool gloabal_pooling_ = false;
 };

 } // namespace operators

--- a/src/operators/pool_op.cpp
+++ b/src/operators/pool_op.cpp
@@ -23,37 +23,37 @@ namespace operators {

 int PoolOutputSize(int input_size, int filter_size, int padding, int stride,
                   bool ceil_mode) {
-    int output_size;
-    if (!ceil_mode) {
-        output_size = (input_size - filter_size + 2 * padding) / stride + 1;
-    } else {
-        output_size =
-            (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
-    }
-    return output_size;
+  int output_size;
+  if (!ceil_mode) {
+    output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+  } else {
+    output_size =
+        (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
+  }
+  return output_size;
 }
 template <typename DeviceType, typename T>
 void PoolOp<DeviceType, T>::InferShape() const {
-    auto in_x_dims = param_.Input()->dims();
-    std::vector<int> ksize = param_.Ksize();
-    std::vector<int> paddings = param_.Paddings();
-    std::vector<int> strides = param_.Strides();
-    bool ceil_mode = param_.isCeilMode();
+  auto in_x_dims = param_.Input()->dims();
+  std::vector<int> ksize = param_.Ksize();
+  std::vector<int> paddings = param_.Paddings();
+  std::vector<int> strides = param_.Strides();
+  bool ceil_mode = param_.isCeilMode();

-    if (param_.isGlobalPooling()) {
-        ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
-        for (size_t i = 0; i < ksize.size(); ++i) {
-            paddings[i] = 0;
-            ksize[i] = static_cast<int>(in_x_dims[i + 2]);
-        }
-    }
-    std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
+  if (param_.isGlobalPooling()) {
+    ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
-        output_shape.push_back(PoolOutputSize(
-            in_x_dims[i + 2], ksize[i], paddings[i], strides[i], ceil_mode));
+      paddings[i] = 0;
+      ksize[i] = static_cast<int>(in_x_dims[i + 2]);
    }
-    param_.Output()->Resize(framework::make_ddim(output_shape));
-    DLOG << "infer shape out size =" << param_.Output()->numel();
+  }
+  std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
+  for (size_t i = 0; i < ksize.size(); ++i) {
+    output_shape.push_back(PoolOutputSize(in_x_dims[i + 2], ksize[i],
+                                          paddings[i], strides[i], ceil_mode));
+  }
+  param_.Output()->Resize(framework::make_ddim(output_shape));
+  DLOG << "infer shape out size =" << param_.Output()->numel();
 }
 template class PoolOp<CPU, float>;
 } // namespace operators

--- a/src/operators/pool_op.h
+++ b/src/operators/pool_op.h
@@ -28,25 +28,25 @@ using namespace framework;

 template <typename DeviceType, typename T>
 class PoolOp : public framework::OperatorWithKernel<DeviceType> {
-  public:
-    PoolOp(const std::string &type, const VariableNameMap &inputs,
-           const VariableNameMap &outputs, const framework::AttributeMap &attrs,
-           std::shared_ptr<framework::Scope> scope)
-        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
-                                                    attrs, scope),
-          param_(inputs, outputs, attrs, *scope) {}
-    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
-    void InferShape() const override;
+public:
+  PoolOp(const std::string &type, const VariableNameMap &inputs,
+         const VariableNameMap &outputs, const framework::AttributeMap &attrs,
+         std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}
+  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+  void InferShape() const override;

-    void Run() const {
-        //        InferShape();
-        operators::PoolKernel<DeviceType, T> kernel;
-        kernel.Compute(param_);
-        this->ClearVariables({"X"});
-    }
+  void Run() const {
+    //        InferShape();
+    operators::PoolKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+    this->ClearVariables({"X"});
+  }

-  private:
-    PoolParam param_;
+private:
+  PoolParam param_;
 };
 } // namespace operators
 } // namespace paddle_mobile
--- a/src/platform/data_type.h
+++ b/src/platform/data_type.h
@@ -22,103 +22,103 @@ namespace paddle_mobile {
 namespace framework {

 inline proto::VarType::Type ToDataType(std::type_index type) {
-    /*if (typeid(platform::float16).hash_code() == type.hash_code()) {
-      return proto::VarType::FP16;
-    } else */
-    if (typeid(const float).hash_code() == type.hash_code()) {
-        // CPPLint complains Using C-style cast.  Use
-        // static_cast<float>() instead
-        // One fix to this is to replace float with const float because
-        // typeid(T) == typeid(const T)
-        // http://en.cppreference.com/w/cpp/language/typeid
-        return proto::VarType::FP32;
-    } else if (typeid(const double).hash_code() == type.hash_code()) {
-        return proto::VarType::FP64;
-    } else if (typeid(const int).hash_code() == type.hash_code()) {
-        return proto::VarType::INT32;
-    } else if (typeid(const int64_t).hash_code() == type.hash_code()) {
-        return proto::VarType::INT64;
-    } else if (typeid(const bool).hash_code() == type.hash_code()) {
-        return proto::VarType::BOOL;
-    } else {
-        //    PADDLE_THROW("Not supported");
-        //    std::cout << "Not supported";
-    }
+  /*if (typeid(platform::float16).hash_code() == type.hash_code()) {
+    return proto::VarType::FP16;
+  } else */
+  if (typeid(const float).hash_code() == type.hash_code()) {
+    // CPPLint complains Using C-style cast.  Use
+    // static_cast<float>() instead
+    // One fix to this is to replace float with const float because
+    // typeid(T) == typeid(const T)
+    // http://en.cppreference.com/w/cpp/language/typeid
+    return proto::VarType::FP32;
+  } else if (typeid(const double).hash_code() == type.hash_code()) {
+    return proto::VarType::FP64;
+  } else if (typeid(const int).hash_code() == type.hash_code()) {
+    return proto::VarType::INT32;
+  } else if (typeid(const int64_t).hash_code() == type.hash_code()) {
+    return proto::VarType::INT64;
+  } else if (typeid(const bool).hash_code() == type.hash_code()) {
+    return proto::VarType::BOOL;
+  } else {
+    //    PADDLE_THROW("Not supported");
+    //    std::cout << "Not supported";
+  }
 }

 inline std::type_index ToTypeIndex(proto::VarType::Type type) {
-    switch (type) {
-    //    case proto::VarType::FP16:
-    //      return typeid(platform::float16);
-    case proto::VarType::FP32:
-        return typeid(float);
-    case proto::VarType::FP64:
-        return typeid(double);
-    case proto::VarType::INT32:
-        return typeid(int);
-    case proto::VarType::INT64:
-        return typeid(int64_t);
-    case proto::VarType::BOOL:
-        return typeid(bool);
-    default:
-        //      PADDLE_THROW("Not support type %d", type);
-        printf("Not support type %d", type);
-    }
+  switch (type) {
+  //    case proto::VarType::FP16:
+  //      return typeid(platform::float16);
+  case proto::VarType::FP32:
+    return typeid(float);
+  case proto::VarType::FP64:
+    return typeid(double);
+  case proto::VarType::INT32:
+    return typeid(int);
+  case proto::VarType::INT64:
+    return typeid(int64_t);
+  case proto::VarType::BOOL:
+    return typeid(bool);
+  default:
+    //      PADDLE_THROW("Not support type %d", type);
+    printf("Not support type %d", type);
+  }
 }

 template <typename Visitor>
 inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
-    switch (type) {
-    //    case proto::VarType::FP16:
-    //      visitor.template operator()<platform::float16>();
-    //      break;
-    case proto::VarType::FP32:
-        visitor.template operator()<float>();
-        break;
-    case proto::VarType::FP64:
-        visitor.template operator()<double>();
-        break;
-    case proto::VarType::INT32:
-        visitor.template operator()<int>();
-        break;
-    case proto::VarType::INT64:
-        visitor.template operator()<int64_t>();
-        break;
-    case proto::VarType::BOOL:
-        visitor.template operator()<bool>();
-        break;
-    default:
-        //      PADDLE_THROW("Not supported");
-        printf("Not supported");
-    }
+  switch (type) {
+  //    case proto::VarType::FP16:
+  //      visitor.template operator()<platform::float16>();
+  //      break;
+  case proto::VarType::FP32:
+    visitor.template operator()<float>();
+    break;
+  case proto::VarType::FP64:
+    visitor.template operator()<double>();
+    break;
+  case proto::VarType::INT32:
+    visitor.template operator()<int>();
+    break;
+  case proto::VarType::INT64:
+    visitor.template operator()<int64_t>();
+    break;
+  case proto::VarType::BOOL:
+    visitor.template operator()<bool>();
+    break;
+  default:
+    //      PADDLE_THROW("Not supported");
+    printf("Not supported");
+  }
 }

 inline std::string DataTypeToString(const proto::VarType::Type type) {
-    switch (type) {
-    case proto::VarType::FP16:
-        return "float16";
-    case proto::VarType::FP32:
-        return "float32";
-    case proto::VarType::FP64:
-        return "float64";
-    case proto::VarType::INT16:
-        return "int16";
-    case proto::VarType::INT32:
-        return "int32";
-    case proto::VarType::INT64:
-        return "int64";
-    case proto::VarType::BOOL:
-        return "bool";
-    default:
-        //      PADDLE_THROW("Not support type %d", type);
-        printf("Not support type %d", type);
-    }
+  switch (type) {
+  case proto::VarType::FP16:
+    return "float16";
+  case proto::VarType::FP32:
+    return "float32";
+  case proto::VarType::FP64:
+    return "float64";
+  case proto::VarType::INT16:
+    return "int16";
+  case proto::VarType::INT32:
+    return "int32";
+  case proto::VarType::INT64:
+    return "int64";
+  case proto::VarType::BOOL:
+    return "bool";
+  default:
+    //      PADDLE_THROW("Not support type %d", type);
+    printf("Not support type %d", type);
+  }
 }

 inline std::ostream &operator<<(std::ostream &out,
                                const proto::VarType::Type &type) {
-    out << DataTypeToString(type);
-    return out;
+  out << DataTypeToString(type);
+  return out;
 }

 } // namespace framework

--- a/src/platform/macros.h
+++ b/src/platform/macros.h
@@ -17,9 +17,9 @@ limitations under the License. */
 // Disable the copy and assignment operator for a class.
 #ifndef DISABLE_COPY_AND_ASSIGN
 #define DISABLE_COPY_AND_ASSIGN(classname)                                     \
-  private:                                                                     \
-    classname(const classname &) = delete;                                     \
-    classname(classname &&) = delete;                                          \
-    classname &operator=(const classname &) = delete;                          \
-    classname &operator=(classname &&) = delete
+private:                                                                       \
+  classname(const classname &) = delete;                                       \
+  classname(classname &&) = delete;                                            \
+  classname &operator=(const classname &) = delete;                            \
+  classname &operator=(classname &&) = delete
 #endif
--- a/test/common/test_log.cpp
+++ b/test/common/test_log.cpp
@@ -20,20 +20,20 @@ SOFTWARE.

 int main() {

-    DLOGF("DASJFDAFJ%d -- %f", 12345, 344.234);
+  DLOGF("DASJFDAFJ%d -- %f", 12345, 344.234);

-    LOGF(paddle_mobile::kLOG_DEBUG, "DASJFDAFJ%d -- %f", 12345, 344.234);
+  LOGF(paddle_mobile::kLOG_DEBUG, "DASJFDAFJ%d -- %f", 12345, 344.234);

-    LOG(paddle_mobile::kLOG_DEBUG) << "test debug"
-                                   << " next log";
+  LOG(paddle_mobile::kLOG_DEBUG) << "test debug"
+                                 << " next log";

-    LOG(paddle_mobile::kLOG_DEBUG1) << "test debug1"
-                                    << " next log";
-    LOG(paddle_mobile::kLOG_DEBUG2) << "test debug2"
-                                    << " next log";
-    DLOG << "test DLOG";
+  LOG(paddle_mobile::kLOG_DEBUG1) << "test debug1"
+                                  << " next log";
+  LOG(paddle_mobile::kLOG_DEBUG2) << "test debug2"
+                                  << " next log";
+  DLOG << "test DLOG";

-    LOG(paddle_mobile::kLOG_ERROR) << " error occur !";
+  LOG(paddle_mobile::kLOG_ERROR) << " error occur !";

-    return 0;
+  return 0;
 }
--- a/test/framework/executor_for_test.cpp
+++ b/test/framework/executor_for_test.cpp
@@ -23,29 +23,29 @@ Executor4Test<DeviceType, OpType>::Executor4Test(const Program<DeviceType> p,
                                                 std::string op_type)
    : Executor<DeviceType>(p) {

-    if (this->program_.originProgram == nullptr) {
-        LOG(paddle_mobile::LogLevel::kLOG_ERROR)
-            << "to_predict_program_ == nullptr";
-    }
+  if (this->program_.originProgram == nullptr) {
+    LOG(paddle_mobile::LogLevel::kLOG_ERROR)
+        << "to_predict_program_ == nullptr";
+  }

-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
-        this->to_predict_program_->Blocks();
+  const std::vector<std::shared_ptr<BlockDesc>> blocks =
+      this->to_predict_program_->Blocks();

-    for (int i = 0; i < blocks.size(); ++i) {
-        std::shared_ptr<BlockDesc> block_desc = blocks[i];
-        std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-        for (int j = 0; j < ops.size(); ++j) {
-            std::shared_ptr<OpDesc> op = ops[j];
-            if (op->Type() == op_type) {
-                std::shared_ptr<OpType> op_ptr = std::make_shared<OpType>(
-                    op->Type(), op->GetInputs(), op->GetOutputs(),
-                    op->GetAttrMap(), this->program_.scope);
+  for (int i = 0; i < blocks.size(); ++i) {
+    std::shared_ptr<BlockDesc> block_desc = blocks[i];
+    std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+    for (int j = 0; j < ops.size(); ++j) {
+      std::shared_ptr<OpDesc> op = ops[j];
+      if (op->Type() == op_type) {
+        std::shared_ptr<OpType> op_ptr = std::make_shared<OpType>(
+            op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
+            this->program_.scope);

-                this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
-                break;
-            }
-        }
+        this->ops_of_block_[*block_desc.get()].push_back(op_ptr);
+        break;
+      }
    }
+  }
 }

 template <typename DeviceType, typename OpType>
@@ -53,19 +53,19 @@ std::shared_ptr<Tensor>
 Executor4Test<DeviceType, OpType>::predict(Tensor &t, std::string input,
                                           std::string output, DDim dDim) {

-    auto scope = this->program_.scope;
-    Variable *g_feed_value = scope->Var(input);
-    auto tensor = g_feed_value->GetMutable<Tensor>();
-    tensor->ShareDataWith(t);
+  auto scope = this->program_.scope;
+  Variable *g_feed_value = scope->Var(input);
+  auto tensor = g_feed_value->GetMutable<Tensor>();
+  tensor->ShareDataWith(t);

-    Variable *con_output = scope->Var(output);
-    Tensor *output_tensor = con_output->GetMutable<Tensor>();
-    output_tensor->mutable_data<float>(dDim);
-    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-    out_tensor.reset(output_tensor);
+  Variable *con_output = scope->Var(output);
+  Tensor *output_tensor = con_output->GetMutable<Tensor>();
+  output_tensor->mutable_data<float>(dDim);
+  std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+  out_tensor.reset(output_tensor);

-    Executor<DeviceType>::predict(t, 0);
-    return out_tensor;
+  Executor<DeviceType>::predict(t, 0);
+  return out_tensor;
 }

 template class Executor4Test<

--- a/test/framework/executor_for_test.h
+++ b/test/framework/executor_for_test.h
@@ -27,9 +27,9 @@ using namespace paddle_mobile::framework;

 template <typename DeviceType, typename OpType>
 class Executor4Test : public Executor<DeviceType> {
-  public:
-    Executor4Test(const Program<DeviceType> p, std::string op_type);
+public:
+  Executor4Test(const Program<DeviceType> p, std::string op_type);

-    std::shared_ptr<Tensor> predict(Tensor &t, std::string input,
-                                    std::string output, DDim dDim);
+  std::shared_ptr<Tensor> predict(Tensor &t, std::string input,
+                                  std::string output, DDim dDim);
 };
--- a/test/framework/test_load.cpp
+++ b/test/framework/test_load.cpp
@@ -19,10 +19,10 @@ SOFTWARE.
 #include "io.h"

 int main() {
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;

-    //../../../test/models/googlenet
-    //../../../test/models/mobilenet
-    auto program = loader.Load(std::string("../models/googlenet"));
-    return 0;
+  //../../../test/models/googlenet
+  //../../../test/models/mobilenet
+  auto program = loader.Load(std::string("../models/googlenet"));
+  return 0;
 }
\ No newline at end of file
--- a/test/framework/test_optimize.cpp
+++ b/test/framework/test_optimize.cpp
@@ -25,16 +25,16 @@ using namespace paddle_mobile::framework;

 int main() {

-    Loader<paddle_mobile::CPU> loader;
-    //    "../../../test/models/googlenet"
-    auto program = loader.Load("../models/googlenet");
+  Loader<paddle_mobile::CPU> loader;
+  //    "../../../test/models/googlenet"
+  auto program = loader.Load("../models/googlenet");

-    ProgramOptimize optimize;
+  ProgramOptimize optimize;

-    auto optimize_program = optimize.FushionOptimize(program.originProgram);
-    if (optimize_program) {
+  auto optimize_program = optimize.FushionOptimize(program.originProgram);
+  if (optimize_program) {

-    } else {
-        DLOG << "optimize_program is null";
-    }
+  } else {
+    DLOG << "optimize_program is null";
+  }
 }
--- a/test/operators/test_batchnorm_op.cpp
+++ b/test/operators/test_batchnorm_op.cpp
@@ -24,102 +24,100 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class TestBatchNormOp {
-  public:
-    explicit TestBatchNormOp(const Program<Dtype> p) : program_(p) {
-        if (use_optimize_) {
-            to_predict_program_ = program_.optimizeProgram;
-        } else {
-            to_predict_program_ = program_.originProgram;
-        }
-
-        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            to_predict_program_->Blocks();
-        //  DLOG << " **block size " << blocks.size();
-        for (int i = 0; i < blocks.size(); ++i) {
-            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            //    DLOG << " ops " << ops.size();
-            for (int j = 0; j < ops.size(); ++j) {
-                std::shared_ptr<OpDesc> op = ops[j];
-                if (op->Type() == "batch_norm" &&
-                    op->Input("X")[0] == "conv2d_0.tmp_0") {
-                    DLOG << " mul attr size: " << op->GetAttrMap().size();
-                    DLOG << " inputs size: " << op->GetInputs().size();
-                    DLOG << " outputs size: " << op->GetOutputs().size();
-                    DLOG << " Input X is : " << op->Input("X")[0];
-                    DLOG << " Input Mean is : " << op->Input("Mean")[0];
-                    DLOG << " Input Variance is : " << op->Input("Variance")[0];
-                    DLOG << " Input Scale is : " << op->Input("Scale")[0];
-                    DLOG << " Input Bias is : " << op->Input("Bias")[0];
-                    DLOG << " Output Y is : " << op->Output("Y")[0];
-                    DLOG << " epsilon : "
-                         << op->GetAttrMap().at("epsilon").Get<float>();
-                    std::shared_ptr<operators::BatchNormOp<Dtype, float>> lrn =
-                        std::make_shared<operators::BatchNormOp<Dtype, float>>(
-                            op->Type(), op->GetInputs(), op->GetOutputs(),
-                            op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(lrn);
-                }
-            }
-        }
+public:
+  explicit TestBatchNormOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
    }

-    std::shared_ptr<Tensor> predict_bn(Tensor &t1, Tensor &t2, Tensor &t3,
-                                       Tensor &t4, Tensor &t5) {
-        // feed
-        auto scope = program_.scope;
-        Variable *x1_feed_value = scope->Var("conv2d_0.tmp_0");
-        auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
-        tensor_x1->ShareDataWith(t1);
-
-        Variable *mean_feed_value = scope->Var("batch_norm_0.w_1");
-        auto tensor_mean = mean_feed_value->GetMutable<Tensor>();
-        tensor_mean->ShareDataWith(t2);
-
-        Variable *scale_feed_value = scope->Var("batch_norm_0.w_0");
-        auto tensor_scale = scale_feed_value->GetMutable<Tensor>();
-        tensor_scale->ShareDataWith(t3);
-
-        Variable *variance_feed_value = scope->Var("batch_norm_0.w_2");
-        auto tensor_variance = variance_feed_value->GetMutable<Tensor>();
-        tensor_variance->ShareDataWith(t4);
-
-        Variable *bias_feed_value = scope->Var("batch_norm_0.b_0");
-        auto tensor_bias = bias_feed_value->GetMutable<Tensor>();
-        tensor_bias->ShareDataWith(t5);
-
-        Variable *output = scope->Var("batch_norm_0.tmp_2");
-        auto *output_tensor = output->GetMutable<Tensor>();
-        output_tensor->mutable_data<float>({4, 10, 2, 2});
-        //  DLOG << typeid(output_tensor).name();
-        //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-        out_tensor.reset(output_tensor);
-
-        predict_bn(t1, t2, t3, t4, t5, 0);
-        return out_tensor;
-    }
-
-  private:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
-
-    void predict_bn(const Tensor &t1, const Tensor &t2, const Tensor &t3,
-                    const Tensor &t4, const Tensor &t5, int block_id) {
-        std::shared_ptr<BlockDesc> to_predict_block =
-            to_predict_program_->Block(block_id);
-        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
-             ++j) {
-            auto op = ops_of_block_[*to_predict_block.get()][j];
-            DLOG << "op -> run()";
-            op->Run();
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "batch_norm" &&
+            op->Input("X")[0] == "conv2d_0.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Mean is : " << op->Input("Mean")[0];
+          DLOG << " Input Variance is : " << op->Input("Variance")[0];
+          DLOG << " Input Scale is : " << op->Input("Scale")[0];
+          DLOG << " Input Bias is : " << op->Input("Bias")[0];
+          DLOG << " Output Y is : " << op->Output("Y")[0];
+          DLOG << " epsilon : " << op->GetAttrMap().at("epsilon").Get<float>();
+          std::shared_ptr<operators::BatchNormOp<Dtype, float>> lrn =
+              std::make_shared<operators::BatchNormOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(lrn);
        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_bn(Tensor &t1, Tensor &t2, Tensor &t3,
+                                     Tensor &t4, Tensor &t5) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("conv2d_0.tmp_0");
+    auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *mean_feed_value = scope->Var("batch_norm_0.w_1");
+    auto tensor_mean = mean_feed_value->GetMutable<Tensor>();
+    tensor_mean->ShareDataWith(t2);
+
+    Variable *scale_feed_value = scope->Var("batch_norm_0.w_0");
+    auto tensor_scale = scale_feed_value->GetMutable<Tensor>();
+    tensor_scale->ShareDataWith(t3);
+
+    Variable *variance_feed_value = scope->Var("batch_norm_0.w_2");
+    auto tensor_variance = variance_feed_value->GetMutable<Tensor>();
+    tensor_variance->ShareDataWith(t4);
+
+    Variable *bias_feed_value = scope->Var("batch_norm_0.b_0");
+    auto tensor_bias = bias_feed_value->GetMutable<Tensor>();
+    tensor_bias->ShareDataWith(t5);
+
+    Variable *output = scope->Var("batch_norm_0.tmp_2");
+    auto *output_tensor = output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({4, 10, 2, 2});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_bn(t1, t2, t3, t4, t5, 0);
+    return out_tensor;
+  }
+
+private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_bn(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+                  const Tensor &t4, const Tensor &t5, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
    }
+  }
 };

 template class TestBatchNormOp<CPU>;
@@ -127,50 +125,48 @@ template class TestBatchNormOp<CPU>;
 } // namespace paddle_mobile

 int main() {
-    DLOG << "----------**********----------";
-    DLOG << "begin to run BatchNormOp Test";
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program = loader.Load(std::string(
-        "../../test/models/image_classification_resnet.inference.model"));
-
-    /// input x (4,10,2,2)
-    paddle_mobile::framework::Tensor inputx1;
-    SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx1_ptr = inputx1.data<float>();
-
-    paddle_mobile::framework::Tensor mean;
-    SetupTensor<float>(&mean, {10}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *mean_ptr = mean.data<float>();
-
-    paddle_mobile::framework::Tensor scale;
-    SetupTensor<float>(&scale, {10}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *scale_ptr = scale.data<float>();
-
-    paddle_mobile::framework::Tensor variance;
-    SetupTensor<float>(&variance, {10}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *variance_ptr = variance.data<float>();
-
-    paddle_mobile::framework::Tensor bias;
-    SetupTensor<float>(&bias, {10}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *bias_ptr = bias.data<float>();
-
-    paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU>
-        testBatchNormOp(program);
-
-    auto output_bn =
-        testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias);
-    auto *output_bn_ptr = output_bn->data<float>();
-
-    /// [2, 5, 1, 0]
-    DLOG << " (" << inputx1_ptr[102] << " - " << mean_ptr[5] << ")/(("
-         << variance_ptr[5] << " + 0.00001"
-         << ")^0.5)* " << scale_ptr[5] << " + " << bias_ptr[5] << " = ";
-    DLOG << output_bn_ptr[102];
-
-    return 0;
+  DLOG << "----------**********----------";
+  DLOG << "begin to run BatchNormOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string(
+      "../../test/models/image_classification_resnet.inference.model"));
+
+  /// input x (4,10,2,2)
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+
+  paddle_mobile::framework::Tensor mean;
+  SetupTensor<float>(&mean, {10}, static_cast<float>(0), static_cast<float>(1));
+  auto *mean_ptr = mean.data<float>();
+
+  paddle_mobile::framework::Tensor scale;
+  SetupTensor<float>(&scale, {10}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *scale_ptr = scale.data<float>();
+
+  paddle_mobile::framework::Tensor variance;
+  SetupTensor<float>(&variance, {10}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *variance_ptr = variance.data<float>();
+
+  paddle_mobile::framework::Tensor bias;
+  SetupTensor<float>(&bias, {10}, static_cast<float>(0), static_cast<float>(1));
+  auto *bias_ptr = bias.data<float>();
+
+  paddle_mobile::framework::TestBatchNormOp<paddle_mobile::CPU> testBatchNormOp(
+      program);
+
+  auto output_bn =
+      testBatchNormOp.predict_bn(inputx1, mean, scale, variance, bias);
+  auto *output_bn_ptr = output_bn->data<float>();
+
+  /// [2, 5, 1, 0]
+  DLOG << " (" << inputx1_ptr[102] << " - " << mean_ptr[5] << ")/(("
+       << variance_ptr[5] << " + 0.00001"
+       << ")^0.5)* " << scale_ptr[5] << " + " << bias_ptr[5] << " = ";
+  DLOG << output_bn_ptr[102];
+
+  return 0;
 }
--- a/test/operators/test_concat_op.cpp
+++ b/test/operators/test_concat_op.cpp
@@ -24,95 +24,92 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class TestConcatOp {
-  public:
-    explicit TestConcatOp(const Program<Dtype> p) : program_(p) {
-        if (use_optimize_) {
-            to_predict_program_ = program_.optimizeProgram;
-        } else {
-            to_predict_program_ = program_.originProgram;
-        }
-
-        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            to_predict_program_->Blocks();
-        //  DLOG << " **block size " << blocks.size();
-        for (int i = 0; i < blocks.size(); ++i) {
-            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            //    DLOG << " ops " << ops.size();
-            for (int j = 0; j < ops.size(); ++j) {
-                std::shared_ptr<OpDesc> op = ops[j];
-                if (op->Type() == "concat" &&
-                    op->Input("X")[0] == "conv2d_3.tmp_1") {
-                    DLOG << " mul attr size: " << op->GetAttrMap().size();
-                    DLOG << " inputs size: " << op->GetInputs().size();
-                    DLOG << " outputs size: " << op->GetOutputs().size();
-                    DLOG << " Input X is : " << op->Input("X")[0];
-                    DLOG << " Output Out is : " << op->Output("Out")[0];
-                    DLOG << " axis : "
-                         << op->GetAttrMap().at("axis").Get<int>();
-
-                    std::shared_ptr<operators::ConcatOp<Dtype, float>> concat =
-                        std::make_shared<operators::ConcatOp<Dtype, float>>(
-                            op->Type(), op->GetInputs(), op->GetOutputs(),
-                            op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(concat);
-                }
-            }
-        }
+public:
+  explicit TestConcatOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
    }

-    std::shared_ptr<Tensor> predict_concat(Tensor &t1, Tensor &t2, Tensor &t3,
-                                           Tensor &t4) {
-        // feed
-        auto scope = program_.scope;
-        Variable *x1_feed_value = scope->Var("conv2d_3.tmp_1");
-        auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
-        tensor_x1->ShareDataWith(t1);
-
-        Variable *x2_feed_value = scope->Var("conv2d_5.tmp_1");
-        auto tensor_x2 = x2_feed_value->GetMutable<Tensor>();
-        tensor_x2->ShareDataWith(t2);
-
-        Variable *x3_feed_value = scope->Var("conv2d_7.tmp_1");
-        auto tensor_x3 = x3_feed_value->GetMutable<Tensor>();
-        tensor_x3->ShareDataWith(t3);
-
-        Variable *x4_feed_value = scope->Var("conv2d_8.tmp_1");
-        auto tensor_x4 = x4_feed_value->GetMutable<Tensor>();
-        tensor_x4->ShareDataWith(t4);
-
-        Variable *con_output = scope->Var("concat_0.tmp_0");
-        auto *output_tensor = con_output->GetMutable<Tensor>();
-        output_tensor->mutable_data<float>({4, 100, 2, 2});
-        //  DLOG << typeid(output_tensor).name();
-        //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-        out_tensor.reset(output_tensor);
-
-        predict_concat(t1, t2, t3, t4, 0);
-        return out_tensor;
-    }
-
-  private:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
-
-    void predict_concat(const Tensor &t1, const Tensor &t2, const Tensor &t3,
-                        const Tensor &t4, int block_id) {
-        std::shared_ptr<BlockDesc> to_predict_block =
-            to_predict_program_->Block(block_id);
-        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
-             ++j) {
-            auto op = ops_of_block_[*to_predict_block.get()][j];
-            DLOG << "op -> run()";
-            op->Run();
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "concat" && op->Input("X")[0] == "conv2d_3.tmp_1") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          DLOG << " axis : " << op->GetAttrMap().at("axis").Get<int>();
+
+          std::shared_ptr<operators::ConcatOp<Dtype, float>> concat =
+              std::make_shared<operators::ConcatOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(concat);
        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_concat(Tensor &t1, Tensor &t2, Tensor &t3,
+                                         Tensor &t4) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("conv2d_3.tmp_1");
+    auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *x2_feed_value = scope->Var("conv2d_5.tmp_1");
+    auto tensor_x2 = x2_feed_value->GetMutable<Tensor>();
+    tensor_x2->ShareDataWith(t2);
+
+    Variable *x3_feed_value = scope->Var("conv2d_7.tmp_1");
+    auto tensor_x3 = x3_feed_value->GetMutable<Tensor>();
+    tensor_x3->ShareDataWith(t3);
+
+    Variable *x4_feed_value = scope->Var("conv2d_8.tmp_1");
+    auto tensor_x4 = x4_feed_value->GetMutable<Tensor>();
+    tensor_x4->ShareDataWith(t4);
+
+    Variable *con_output = scope->Var("concat_0.tmp_0");
+    auto *output_tensor = con_output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({4, 100, 2, 2});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_concat(t1, t2, t3, t4, 0);
+    return out_tensor;
+  }
+
+private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_concat(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+                      const Tensor &t4, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
    }
+  }
 };

 template class TestConcatOp<CPU>;
@@ -120,60 +117,59 @@ template class TestConcatOp<CPU>;
 } // namespace paddle_mobile

 int main() {
-    DLOG << "----------**********----------";
-    DLOG << "begin to run ConcatOp Test";
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program = loader.Load(std::string("../../test/models/googlenet"));
-
-    /// input x (4,10,2,2)
-    paddle_mobile::framework::Tensor inputx1;
-    SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx1_ptr = inputx1.data<float>();
-    /// input x (4,20,2,2)
-    paddle_mobile::framework::Tensor inputx2;
-    SetupTensor<float>(&inputx2, {4, 20, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx2_ptr = inputx2.data<float>();
-    /// input x (4,30,2,2)
-    paddle_mobile::framework::Tensor inputx3;
-    SetupTensor<float>(&inputx3, {4, 30, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx3_ptr = inputx3.data<float>();
-    /// input x (4,40,2,2)
-    paddle_mobile::framework::Tensor inputx4;
-    SetupTensor<float>(&inputx4, {4, 40, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx4_ptr = inputx4.data<float>();
-
-    paddle_mobile::framework::TestConcatOp<paddle_mobile::CPU> testConcatOp(
-        program);
-
-    auto output_concat =
-        testConcatOp.predict_concat(inputx1, inputx2, inputx3, inputx4);
-    auto *output_concat_ptr = output_concat->data<float>();
-
-    int input_n = 1;
-    int input_c = 2;
-    int input_h = 0;
-    int input_w = 1;
-    int stride0 = inputx3.numel() / inputx3.dims()[0];
-    int stride1 = inputx3.numel() / inputx3.dims()[0] / inputx3.dims()[1];
-    int stride2 = inputx3.dims()[3];
-    /// inputx1 (4,10,2,2),
-    /// inputx2 (4,20,2,2),
-    /// inputx3 (4,30,2,2),
-    /// inputx4 (4,40,2,2),
-    /// axis = 1
-    /// output (4,100,2,2)
-    int input_index =
-        input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
-    int output_index =
-        input_n * 100 * 2 * 2 +
-        (input_c + inputx1.dims()[1] + inputx2.dims()[1]) * 2 * 2 +
-        input_h * 2 + input_w;
-
-    DLOG << " inputx3[1,2,0,1] = " << inputx3_ptr[input_index];
-    DLOG << " output[1,12,0,1] = " << output_concat_ptr[output_index];
-    return 0;
+  DLOG << "----------**********----------";
+  DLOG << "begin to run ConcatOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("../../test/models/googlenet"));
+
+  /// input x (4,10,2,2)
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+  /// input x (4,20,2,2)
+  paddle_mobile::framework::Tensor inputx2;
+  SetupTensor<float>(&inputx2, {4, 20, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx2_ptr = inputx2.data<float>();
+  /// input x (4,30,2,2)
+  paddle_mobile::framework::Tensor inputx3;
+  SetupTensor<float>(&inputx3, {4, 30, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx3_ptr = inputx3.data<float>();
+  /// input x (4,40,2,2)
+  paddle_mobile::framework::Tensor inputx4;
+  SetupTensor<float>(&inputx4, {4, 40, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx4_ptr = inputx4.data<float>();
+
+  paddle_mobile::framework::TestConcatOp<paddle_mobile::CPU> testConcatOp(
+      program);
+
+  auto output_concat =
+      testConcatOp.predict_concat(inputx1, inputx2, inputx3, inputx4);
+  auto *output_concat_ptr = output_concat->data<float>();
+
+  int input_n = 1;
+  int input_c = 2;
+  int input_h = 0;
+  int input_w = 1;
+  int stride0 = inputx3.numel() / inputx3.dims()[0];
+  int stride1 = inputx3.numel() / inputx3.dims()[0] / inputx3.dims()[1];
+  int stride2 = inputx3.dims()[3];
+  /// inputx1 (4,10,2,2),
+  /// inputx2 (4,20,2,2),
+  /// inputx3 (4,30,2,2),
+  /// inputx4 (4,40,2,2),
+  /// axis = 1
+  /// output (4,100,2,2)
+  int input_index =
+      input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
+  int output_index = input_n * 100 * 2 * 2 +
+                     (input_c + inputx1.dims()[1] + inputx2.dims()[1]) * 2 * 2 +
+                     input_h * 2 + input_w;
+
+  DLOG << " inputx3[1,2,0,1] = " << inputx3_ptr[input_index];
+  DLOG << " output[1,12,0,1] = " << output_concat_ptr[output_index];
+  return 0;
 }
--- a/test/operators/test_cov_op.cpp
+++ b/test/operators/test_cov_op.cpp
@@ -21,26 +21,26 @@ SOFTWARE.
 #include "io.h"

 int main() {
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program = loader.Load(std::string("../models/googlenet"));
-    if (program.originProgram == nullptr) {
-        DLOG << "program file read fail";
-    }
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("../models/googlenet"));
+  if (program.originProgram == nullptr) {
+    DLOG << "program file read fail";
+  }

-    Executor4Test<paddle_mobile::CPU,
-                  paddle_mobile::operators::ConvOp<paddle_mobile::CPU, float>>
-        executor(program, "conv2d");
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::ConvOp<paddle_mobile::CPU, float>>
+      executor(program, "conv2d");

-    paddle_mobile::framework::Tensor input;
-    SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
-                       static_cast<float>(1));
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
+                     static_cast<float>(1));

-    auto output =
-        executor.predict(input, "data", "conv2d_0.tmp_0", {1, 64, 56, 56});
+  auto output =
+      executor.predict(input, "data", "conv2d_0.tmp_0", {1, 64, 56, 56});

-    float *output_ptr = output->data<float>();
-    for (int j = 0; j < output->numel(); ++j) {
-        DLOG << " value of output: " << output_ptr[j];
-    }
-    return 0;
+  float *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
 }
--- a/test/operators/test_elementwise_add_op.cpp
+++ b/test/operators/test_elementwise_add_op.cpp
@@ -24,127 +24,124 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class TestElementwiseAddOp {
-  public:
-    explicit TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
-        if (use_optimize_) {
-            to_predict_program_ = program_.optimizeProgram;
-        } else {
-            to_predict_program_ = program_.originProgram;
-        }
-
-        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            to_predict_program_->Blocks();
-        //  DLOG << " **block size " << blocks.size();
-        for (int i = 0; i < blocks.size(); ++i) {
-            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            //    DLOG << " ops " << ops.size();
-            for (int j = 0; j < ops.size(); ++j) {
-                std::shared_ptr<OpDesc> op = ops[j];
-                if (op->Type() == "elementwise_add" &&
-                    op->Input("X")[0] == "batch_norm_2.tmp_2") {
-                    DLOG << " elementwise_add attr size: "
-                         << op->GetAttrMap().size();
-                    DLOG << " inputs size: " << op->GetInputs().size();
-                    DLOG << " outputs size: " << op->GetOutputs().size();
-                    DLOG << " Input X is : " << op->Input("X")[0];
-                    DLOG << " Input Y is : " << op->Input("Y")[0];
-                    DLOG << " Output Out is : " << op->Output("Out")[0];
-                    Attribute axis_attr = op->GetAttrMap().at("axis");
-                    int axis = axis_attr.Get<int>();
-                    DLOG << " Attr axis is : " << axis;
-
-                    std::shared_ptr<operators::ElementwiseAddOp<Dtype, float>>
-                        add = std::make_shared<
-                            operators::ElementwiseAddOp<Dtype, float>>(
-                            op->Type(), op->GetInputs(), op->GetOutputs(),
-                            op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(add);
-                }
-            }
-        }
-    }
-
-    std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
-        // feed
-        auto scope = program_.scope;
-        Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
-        auto tensor_x = x_feed_value->GetMutable<Tensor>();
-        tensor_x->ShareDataWith(t1);
-
-        Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
-        auto tensor_y = y_feed_value->GetMutable<Tensor>();
-        tensor_y->ShareDataWith(t2);
-
-        Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
-        auto *output_tensor = con_output->GetMutable<Tensor>();
-        output_tensor->mutable_data<float>({1, 3, 224, 224});
-        //  DLOG << typeid(output_tensor).name();
-        //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-        out_tensor.reset(output_tensor);
-
-        predict_add(t1, t2, 0);
-        return out_tensor;
+public:
+  explicit TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
    }

-  private:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
-
-    void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
-        std::shared_ptr<BlockDesc> to_predict_block =
-            to_predict_program_->Block(block_id);
-        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
-             ++j) {
-            auto op = ops_of_block_[*to_predict_block.get()][j];
-            DLOG << "op -> run()";
-            op->Run();
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "elementwise_add" &&
+            op->Input("X")[0] == "batch_norm_2.tmp_2") {
+          DLOG << " elementwise_add attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Y is : " << op->Input("Y")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          Attribute axis_attr = op->GetAttrMap().at("axis");
+          int axis = axis_attr.Get<int>();
+          DLOG << " Attr axis is : " << axis;
+
+          std::shared_ptr<operators::ElementwiseAddOp<Dtype, float>> add =
+              std::make_shared<operators::ElementwiseAddOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(add);
        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
+    auto tensor_x = x_feed_value->GetMutable<Tensor>();
+    tensor_x->ShareDataWith(t1);
+
+    Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
+    auto tensor_y = y_feed_value->GetMutable<Tensor>();
+    tensor_y->ShareDataWith(t2);
+
+    Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
+    auto *output_tensor = con_output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({1, 3, 224, 224});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_add(t1, t2, 0);
+    return out_tensor;
+  }
+
+private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
    }
+  }
 };

 template class TestElementwiseAddOp<CPU>;
 } // namespace framework
 } // namespace paddle_mobile
 int main() {
-    DLOG << "----------**********----------";
-    DLOG << "begin to run ElementAddOp Test";
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program =
-        loader.Load(std::string("../../test/models/"
-                                "image_classification_resnet.inference.model"));
-
-    /// input x (1,3,224,224)
-    paddle_mobile::framework::Tensor inputx;
-    SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx_ptr = inputx.data<float>();
-    /// input y (224,)
-    paddle_mobile::framework::Tensor inputy;
-    SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputy_ptr = inputy.data<float>();
-
-    paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
-        testElementwiseAddOp(program);
-
-    auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
-    auto *output_add_ptr = output_add->data<float>();
-    //            for (int j = 0; j < output_add->numel(); ++j) {
-    //                DLOG << "value of output: " << output_add_ptr[j];
-    //            }
-
-    /// output (1,3,224,224)
-    DLOG << "output memory size : " << output_add->memory_size();
-    DLOG << "output numel : " << output_add->numel();
-
-    DLOG << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
-         << output_add_ptr[226];
-    return 0;
+  DLOG << "----------**********----------";
+  DLOG << "begin to run ElementAddOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program =
+      loader.Load(std::string("../../test/models/"
+                              "image_classification_resnet.inference.model"));
+
+  /// input x (1,3,224,224)
+  paddle_mobile::framework::Tensor inputx;
+  SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx_ptr = inputx.data<float>();
+  /// input y (224,)
+  paddle_mobile::framework::Tensor inputy;
+  SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputy_ptr = inputy.data<float>();
+
+  paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
+      testElementwiseAddOp(program);
+
+  auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
+  auto *output_add_ptr = output_add->data<float>();
+  //            for (int j = 0; j < output_add->numel(); ++j) {
+  //                DLOG << "value of output: " << output_add_ptr[j];
+  //            }
+
+  /// output (1,3,224,224)
+  DLOG << "output memory size : " << output_add->memory_size();
+  DLOG << "output numel : " << output_add->numel();
+
+  DLOG << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
+       << output_add_ptr[226];
+  return 0;
 }
--- a/test/operators/test_lrn_op.cpp
+++ b/test/operators/test_lrn_op.cpp
@@ -24,84 +24,80 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class TestLrnOp {
-  public:
-    explicit TestLrnOp(const Program<Dtype> p) : program_(p) {
-        if (use_optimize_) {
-            to_predict_program_ = program_.optimizeProgram;
-        } else {
-            to_predict_program_ = program_.originProgram;
-        }
-
-        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            to_predict_program_->Blocks();
-        //  DLOG << " **block size " << blocks.size();
-        for (int i = 0; i < blocks.size(); ++i) {
-            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            //    DLOG << " ops " << ops.size();
-            for (int j = 0; j < ops.size(); ++j) {
-                std::shared_ptr<OpDesc> op = ops[j];
-                if (op->Type() == "lrn" &&
-                    op->Input("X")[0] == "pool2d_0.tmp_0") {
-                    DLOG << " mul attr size: " << op->GetAttrMap().size();
-                    DLOG << " inputs size: " << op->GetInputs().size();
-                    DLOG << " outputs size: " << op->GetOutputs().size();
-                    DLOG << " Input X is : " << op->Input("X")[0];
-                    DLOG << " Output Out is : " << op->Output("Out")[0];
-                    DLOG << " n : " << op->GetAttrMap().at("n").Get<int>();
-                    DLOG << " alpha : "
-                         << op->GetAttrMap().at("alpha").Get<float>();
-                    DLOG << " beta : "
-                         << op->GetAttrMap().at("beta").Get<float>();
-                    DLOG << " k : " << op->GetAttrMap().at("k").Get<float>();
-                    std::shared_ptr<operators::LrnOp<Dtype, float>> lrn =
-                        std::make_shared<operators::LrnOp<Dtype, float>>(
-                            op->Type(), op->GetInputs(), op->GetOutputs(),
-                            op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(lrn);
-                }
-            }
-        }
+public:
+  explicit TestLrnOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
    }

-    std::shared_ptr<Tensor> predict_lrn(Tensor &t1) {
-        // feed
-        auto scope = program_.scope;
-        Variable *x1_feed_value = scope->Var("pool2d_0.tmp_0");
-        auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
-        tensor_x1->ShareDataWith(t1);
-
-        Variable *con_output = scope->Var("pool1_norm1.tmp_1");
-        auto *output_tensor = con_output->GetMutable<Tensor>();
-        output_tensor->mutable_data<float>({3, 4, 2, 2});
-        //  DLOG << typeid(output_tensor).name();
-        //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-        out_tensor.reset(output_tensor);
-
-        predict_lrn(t1, 0);
-        return out_tensor;
-    }
-
-  private:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
-
-    void predict_lrn(const Tensor &t1, int block_id) {
-        std::shared_ptr<BlockDesc> to_predict_block =
-            to_predict_program_->Block(block_id);
-        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
-             ++j) {
-            auto op = ops_of_block_[*to_predict_block.get()][j];
-            DLOG << "op -> run()";
-            op->Run();
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "lrn" && op->Input("X")[0] == "pool2d_0.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          DLOG << " n : " << op->GetAttrMap().at("n").Get<int>();
+          DLOG << " alpha : " << op->GetAttrMap().at("alpha").Get<float>();
+          DLOG << " beta : " << op->GetAttrMap().at("beta").Get<float>();
+          DLOG << " k : " << op->GetAttrMap().at("k").Get<float>();
+          std::shared_ptr<operators::LrnOp<Dtype, float>> lrn =
+              std::make_shared<operators::LrnOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(lrn);
        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_lrn(Tensor &t1) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x1_feed_value = scope->Var("pool2d_0.tmp_0");
+    auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
+    tensor_x1->ShareDataWith(t1);
+
+    Variable *con_output = scope->Var("pool1_norm1.tmp_1");
+    auto *output_tensor = con_output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({3, 4, 2, 2});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_lrn(t1, 0);
+    return out_tensor;
+  }
+
+private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_lrn(const Tensor &t1, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
    }
+  }
 };

 template class TestLrnOp<CPU>;
@@ -109,51 +105,50 @@ template class TestLrnOp<CPU>;
 } // namespace paddle_mobile

 int main() {
-    DLOG << "----------**********----------";
-    DLOG << "begin to run LrnOp Test";
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program = loader.Load(std::string("../../test/models/googlenet"));
-
-    /// input x (3,4,2,2)
-    paddle_mobile::framework::Tensor inputx1;
-    SetupTensor<float>(&inputx1, {3, 4, 2, 2}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx1_ptr = inputx1.data<float>();
-
-    paddle_mobile::framework::TestLrnOp<paddle_mobile::CPU> testLrnOp(program);
-
-    auto output_lrn = testLrnOp.predict_lrn(inputx1);
-    auto *output_lrn_ptr = output_lrn->data<float>();
-
-    DLOG << " LrnOp input: ";
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 4; j++) {
-            for (int c = 0; c < 2; c++) {
-                for (int d = 0; d < 2; d++) {
-                    DLOGF("%f ", inputx1_ptr[i * 16 + j * 4 + c * 2 + d]);
-                }
-                DLOGF("\n");
-            }
-            DLOGF("\n");
+  DLOG << "----------**********----------";
+  DLOG << "begin to run LrnOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("../../test/models/googlenet"));
+
+  /// input x (3,4,2,2)
+  paddle_mobile::framework::Tensor inputx1;
+  SetupTensor<float>(&inputx1, {3, 4, 2, 2}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx1_ptr = inputx1.data<float>();
+
+  paddle_mobile::framework::TestLrnOp<paddle_mobile::CPU> testLrnOp(program);
+
+  auto output_lrn = testLrnOp.predict_lrn(inputx1);
+  auto *output_lrn_ptr = output_lrn->data<float>();
+
+  DLOG << " LrnOp input: ";
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      for (int c = 0; c < 2; c++) {
+        for (int d = 0; d < 2; d++) {
+          DLOGF("%f ", inputx1_ptr[i * 16 + j * 4 + c * 2 + d]);
        }
        DLOGF("\n");
+      }
+      DLOGF("\n");
    }
-    DLOG << " LrnOp output: ";
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 4; j++) {
-            for (int c = 0; c < 2; c++) {
-                for (int d = 0; d < 2; d++) {
-                    DLOGF("%f ", output_lrn_ptr[i * 16 + j * 4 + c * 2 + d]);
-                }
-                DLOGF("\n");
-            }
-            DLOGF("\n");
+    DLOGF("\n");
+  }
+  DLOG << " LrnOp output: ";
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      for (int c = 0; c < 2; c++) {
+        for (int d = 0; d < 2; d++) {
+          DLOGF("%f ", output_lrn_ptr[i * 16 + j * 4 + c * 2 + d]);
        }
        DLOGF("\n");
+      }
+      DLOGF("\n");
    }
-    DLOG << inputx1_ptr[0] << " / ((1 + 0.00002 * ( " << inputx1_ptr[0]
-         << "^2 + " << inputx1_ptr[4] << "^2 + " << inputx1_ptr[8]
-         << "^2 ))^0.75) = ";
-    DLOG << output_lrn_ptr[0];
-    return 0;
+    DLOGF("\n");
+  }
+  DLOG << inputx1_ptr[0] << " / ((1 + 0.00002 * ( " << inputx1_ptr[0] << "^2 + "
+       << inputx1_ptr[4] << "^2 + " << inputx1_ptr[8] << "^2 ))^0.75) = ";
+  DLOG << output_lrn_ptr[0];
+  return 0;
 }
--- a/test/operators/test_mul_op.cpp
+++ b/test/operators/test_mul_op.cpp
@@ -24,88 +24,86 @@ namespace paddle_mobile {
 namespace framework {

 template <typename Dtype> class TestMulOp {
-  public:
-    explicit TestMulOp(const Program<Dtype> p) : program_(p) {
-        if (use_optimize_) {
-            to_predict_program_ = program_.optimizeProgram;
-        } else {
-            to_predict_program_ = program_.originProgram;
-        }
-
-        const std::vector<std::shared_ptr<BlockDesc>> blocks =
-            to_predict_program_->Blocks();
-        //  DLOG << " **block size " << blocks.size();
-        for (int i = 0; i < blocks.size(); ++i) {
-            std::shared_ptr<BlockDesc> block_desc = blocks[i];
-            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-            //    DLOG << " ops " << ops.size();
-            for (int j = 0; j < ops.size(); ++j) {
-                std::shared_ptr<OpDesc> op = ops[j];
-                if (op->Type() == "mul" &&
-                    op->Input("X")[0] == "pool2d_0.tmp_0") {
-                    DLOG << " mul attr size: " << op->GetAttrMap().size();
-                    DLOG << " inputs size: " << op->GetInputs().size();
-                    DLOG << " outputs size: " << op->GetOutputs().size();
-                    DLOG << " Input X is : " << op->Input("X")[0];
-                    DLOG << " Input Y is : " << op->Input("Y")[0];
-                    DLOG << " Output Out is : " << op->Output("Out")[0];
-                    DLOG << "x_num_col_dims : "
-                         << op->GetAttrMap().at("x_num_col_dims").Get<int>();
-                    DLOG << "y_num_col_dims : "
-                         << op->GetAttrMap().at("y_num_col_dims").Get<int>();
-
-                    std::shared_ptr<operators::MulOp<Dtype, float>> mul =
-                        std::make_shared<operators::MulOp<Dtype, float>>(
-                            op->Type(), op->GetInputs(), op->GetOutputs(),
-                            op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(mul);
-                }
-            }
-        }
+public:
+  explicit TestMulOp(const Program<Dtype> p) : program_(p) {
+    if (use_optimize_) {
+      to_predict_program_ = program_.optimizeProgram;
+    } else {
+      to_predict_program_ = program_.originProgram;
    }

-    std::shared_ptr<Tensor> predict_mul(Tensor &t1, Tensor &t2) {
-        // feed
-        auto scope = program_.scope;
-        Variable *x_feed_value = scope->Var("pool2d_0.tmp_0");
-        auto tensor_x = x_feed_value->GetMutable<Tensor>();
-        tensor_x->ShareDataWith(t1);
-
-        Variable *y_feed_value = scope->Var("fc_0.w_0");
-        auto tensor_y = y_feed_value->GetMutable<Tensor>();
-        tensor_y->ShareDataWith(t2);
-
-        Variable *con_output = scope->Var("fc_0.tmp_0");
-        auto *output_tensor = con_output->GetMutable<Tensor>();
-        output_tensor->mutable_data<float>({3, 3});
-        //  DLOG << typeid(output_tensor).name();
-        //  DLOG << "output_tensor dims: " << output_tensor->dims();
-
-        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
-        out_tensor.reset(output_tensor);
-
-        predict_mul(t1, t2, 0);
-        return out_tensor;
-    }
-
-  private:
-    const framework::Program<Dtype> program_;
-    std::shared_ptr<ProgramDesc> to_predict_program_;
-    std::map<framework::BlockDesc,
-             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
-        ops_of_block_;
-    bool use_optimize_ = false;
-
-    void predict_mul(const Tensor &t1, const Tensor &t2, int block_id) {
-        std::shared_ptr<BlockDesc> to_predict_block =
-            to_predict_program_->Block(block_id);
-        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
-             ++j) {
-            auto op = ops_of_block_[*to_predict_block.get()][j];
-            DLOG << "op -> run()";
-            op->Run();
+    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+        to_predict_program_->Blocks();
+    //  DLOG << " **block size " << blocks.size();
+    for (int i = 0; i < blocks.size(); ++i) {
+      std::shared_ptr<BlockDesc> block_desc = blocks[i];
+      std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+      //    DLOG << " ops " << ops.size();
+      for (int j = 0; j < ops.size(); ++j) {
+        std::shared_ptr<OpDesc> op = ops[j];
+        if (op->Type() == "mul" && op->Input("X")[0] == "pool2d_0.tmp_0") {
+          DLOG << " mul attr size: " << op->GetAttrMap().size();
+          DLOG << " inputs size: " << op->GetInputs().size();
+          DLOG << " outputs size: " << op->GetOutputs().size();
+          DLOG << " Input X is : " << op->Input("X")[0];
+          DLOG << " Input Y is : " << op->Input("Y")[0];
+          DLOG << " Output Out is : " << op->Output("Out")[0];
+          DLOG << "x_num_col_dims : "
+               << op->GetAttrMap().at("x_num_col_dims").Get<int>();
+          DLOG << "y_num_col_dims : "
+               << op->GetAttrMap().at("y_num_col_dims").Get<int>();
+
+          std::shared_ptr<operators::MulOp<Dtype, float>> mul =
+              std::make_shared<operators::MulOp<Dtype, float>>(
+                  op->Type(), op->GetInputs(), op->GetOutputs(),
+                  op->GetAttrMap(), program_.scope);
+          ops_of_block_[*block_desc.get()].push_back(mul);
        }
+      }
+    }
+  }
+
+  std::shared_ptr<Tensor> predict_mul(Tensor &t1, Tensor &t2) {
+    // feed
+    auto scope = program_.scope;
+    Variable *x_feed_value = scope->Var("pool2d_0.tmp_0");
+    auto tensor_x = x_feed_value->GetMutable<Tensor>();
+    tensor_x->ShareDataWith(t1);
+
+    Variable *y_feed_value = scope->Var("fc_0.w_0");
+    auto tensor_y = y_feed_value->GetMutable<Tensor>();
+    tensor_y->ShareDataWith(t2);
+
+    Variable *con_output = scope->Var("fc_0.tmp_0");
+    auto *output_tensor = con_output->GetMutable<Tensor>();
+    output_tensor->mutable_data<float>({3, 3});
+    //  DLOG << typeid(output_tensor).name();
+    //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+    std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+    out_tensor.reset(output_tensor);
+
+    predict_mul(t1, t2, 0);
+    return out_tensor;
+  }
+
+private:
+  const framework::Program<Dtype> program_;
+  std::shared_ptr<ProgramDesc> to_predict_program_;
+  std::map<framework::BlockDesc,
+           std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+      ops_of_block_;
+  bool use_optimize_ = false;
+
+  void predict_mul(const Tensor &t1, const Tensor &t2, int block_id) {
+    std::shared_ptr<BlockDesc> to_predict_block =
+        to_predict_program_->Block(block_id);
+    for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+      auto op = ops_of_block_[*to_predict_block.get()][j];
+      DLOG << "op -> run()";
+      op->Run();
    }
+  }
 };

 template class TestMulOp<CPU>;
@@ -113,62 +111,62 @@ template class TestMulOp<CPU>;
 } // namespace paddle_mobile

 int main() {
-    DLOG << "----------**********----------";
-    DLOG << "begin to run MulOp Test";
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program =
-        loader.Load(std::string("../../test/models/"
-                                "image_classification_resnet.inference.model"));
-
-    /// input x (3,2,1,1)
-    paddle_mobile::framework::Tensor inputx;
-    SetupTensor<float>(&inputx, {3, 2, 1, 1}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputx_ptr = inputx.data<float>();
-
-    /// input y (2,3)
-    paddle_mobile::framework::Tensor inputy;
-    SetupTensor<float>(&inputy, {2, 3}, static_cast<float>(0),
-                       static_cast<float>(1));
-    auto *inputy_ptr = inputy.data<float>();
-
-    paddle_mobile::framework::TestMulOp<paddle_mobile::CPU> testMulOp(program);
-
-    auto output_mul = testMulOp.predict_mul(inputx, inputy);
-    auto *output_mul_ptr = output_mul->data<float>();
-
-    auto dimx_1 = inputx.numel() / inputx.dims()[0];
-    DLOG << " inputx : ";
-    for (int i = 0; i < inputx.dims()[0]; ++i) {
-        for (int j = 0; j < dimx_1; ++j) {
-            DLOGF("%f ", inputx_ptr[i * dimx_1 + j]);
-        }
-        DLOGF("\n");
+  DLOG << "----------**********----------";
+  DLOG << "begin to run MulOp Test";
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program =
+      loader.Load(std::string("../../test/models/"
+                              "image_classification_resnet.inference.model"));
+
+  /// input x (3,2,1,1)
+  paddle_mobile::framework::Tensor inputx;
+  SetupTensor<float>(&inputx, {3, 2, 1, 1}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputx_ptr = inputx.data<float>();
+
+  /// input y (2,3)
+  paddle_mobile::framework::Tensor inputy;
+  SetupTensor<float>(&inputy, {2, 3}, static_cast<float>(0),
+                     static_cast<float>(1));
+  auto *inputy_ptr = inputy.data<float>();
+
+  paddle_mobile::framework::TestMulOp<paddle_mobile::CPU> testMulOp(program);
+
+  auto output_mul = testMulOp.predict_mul(inputx, inputy);
+  auto *output_mul_ptr = output_mul->data<float>();
+
+  auto dimx_1 = inputx.numel() / inputx.dims()[0];
+  DLOG << " inputx : ";
+  for (int i = 0; i < inputx.dims()[0]; ++i) {
+    for (int j = 0; j < dimx_1; ++j) {
+      DLOGF("%f ", inputx_ptr[i * dimx_1 + j]);
    }
-
-    auto dimy_1 = inputy.numel() / inputy.dims()[0];
-    DLOG << " inputy : ";
-    for (int i = 0; i < inputy.dims()[0]; ++i) {
-        for (int j = 0; j < dimy_1; ++j) {
-            DLOGF("%f ", inputy_ptr[i * dimx_1 + j]);
-        }
-        DLOGF("\n");
+    DLOGF("\n");
+  }
+
+  auto dimy_1 = inputy.numel() / inputy.dims()[0];
+  DLOG << " inputy : ";
+  for (int i = 0; i < inputy.dims()[0]; ++i) {
+    for (int j = 0; j < dimy_1; ++j) {
+      DLOGF("%f ", inputy_ptr[i * dimx_1 + j]);
    }
-
-    auto dim_output_1 = output_mul->numel() / output_mul->dims()[0];
-    DLOG << " output : ";
-    for (int i = 0; i < output_mul->dims()[0]; ++i) {
-        for (int j = 0; j < dim_output_1; ++j) {
-            DLOGF("%f ", output_mul_ptr[i * dimy_1 + j]);
-        }
-        DLOGF("\n");
+    DLOGF("\n");
+  }
+
+  auto dim_output_1 = output_mul->numel() / output_mul->dims()[0];
+  DLOG << " output : ";
+  for (int i = 0; i < output_mul->dims()[0]; ++i) {
+    for (int j = 0; j < dim_output_1; ++j) {
+      DLOGF("%f ", output_mul_ptr[i * dimy_1 + j]);
    }
+    DLOGF("\n");
+  }

-    /// output (3,3)
-    DLOG << "output memory size : " << output_mul->memory_size();
-    DLOG << "output numel : " << output_mul->numel();
+  /// output (3,3)
+  DLOG << "output memory size : " << output_mul->memory_size();
+  DLOG << "output numel : " << output_mul->numel();

-    DLOG << inputx_ptr[0] << " x " << inputy_ptr[0] << " + " << inputx_ptr[1]
-         << " x " << inputy_ptr[0 + 3] << " = " << output_mul_ptr[0];
-    return 0;
+  DLOG << inputx_ptr[0] << " x " << inputy_ptr[0] << " + " << inputx_ptr[1]
+       << " x " << inputy_ptr[0 + 3] << " = " << output_mul_ptr[0];
+  return 0;
 }
--- a/test/operators/test_pool_op.cpp
+++ b/test/operators/test_pool_op.cpp
@@ -21,26 +21,26 @@ SOFTWARE.
 #include "io.h"

 int main() {
-    paddle_mobile::Loader<paddle_mobile::CPU> loader;
-    auto program = loader.Load(std::string("../models/googlenet"));
-    if (program.originProgram == nullptr) {
-        DLOG << "program read file";
-    }
+  paddle_mobile::Loader<paddle_mobile::CPU> loader;
+  auto program = loader.Load(std::string("../models/googlenet"));
+  if (program.originProgram == nullptr) {
+    DLOG << "program read file";
+  }

-    Executor4Test<paddle_mobile::CPU,
-                  paddle_mobile::operators::PoolOp<paddle_mobile::CPU, float>>
-        executor(program, "pool2d");
+  Executor4Test<paddle_mobile::CPU,
+                paddle_mobile::operators::PoolOp<paddle_mobile::CPU, float>>
+      executor(program, "pool2d");

-    paddle_mobile::framework::Tensor input;
-    SetupTensor<float>(&input, {1, 64, 112, 112}, static_cast<float>(0),
-                       static_cast<float>(1));
+  paddle_mobile::framework::Tensor input;
+  SetupTensor<float>(&input, {1, 64, 112, 112}, static_cast<float>(0),
+                     static_cast<float>(1));

-    auto output = executor.predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0",
-                                   {1, 64, 56, 56});
+  auto output = executor.predict(input, "conv2d_0.tmp_1", "pool2d_0.tmp_0",
+                                 {1, 64, 56, 56});

-    float *output_ptr = output->data<float>();
-    for (int j = 0; j < output->numel(); ++j) {
-        DLOG << " value of output: " << output_ptr[j];
-    }
-    return 0;
+  float *output_ptr = output->data<float>();
+  for (int j = 0; j < output->numel(); ++j) {
+    DLOG << " value of output: " << output_ptr[j];
+  }
+  return 0;
 }
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -24,13 +24,12 @@ SOFTWARE.
 template <typename T>
 void SetupTensor(paddle_mobile::framework::Tensor *input,
                 paddle_mobile::framework::DDim dims, T lower, T upper) {
-    static unsigned int seed = 100;
-    std::mt19937 rng(seed++);
-    std::uniform_real_distribution<double> uniform_dist(0, 1);
+  static unsigned int seed = 100;
+  std::mt19937 rng(seed++);
+  std::uniform_real_distribution<double> uniform_dist(0, 1);

-    T *input_ptr = input->mutable_data<T>(dims);
-    for (int i = 0; i < input->numel(); ++i) {
-        input_ptr[i] =
-            static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
-    }
+  T *input_ptr = input->mutable_data<T>(dims);
+  for (int i = 0; i < input->numel(); ++i) {
+    input_ptr[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
+  }
 }