diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a33fe39bca7430a93fff2fa6f7dc3712b3a92832
--- /dev/null
+++ b/src/operators/concat_op.cpp
@@ -0,0 +1,64 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#include "concat_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void ConcatOp<Dtype, T>::InferShape() const {
+    auto inputs = param_.Inputs();
+    const size_t n = inputs.size();
+
+    std::vector<DDim> inputs_dims;
+    inputs_dims.reserve(n);
+    for (int i = 0; i < n; i++) {
+        inputs_dims.push_back(inputs[i]->dims());
+    }
+
+    auto axis = static_cast<size_t>(param_.Axis());
+
+    if (n == 1) {
+        DLOG << "Warning: concat op have only one input, "
+                "may waste memory";
+    }
+
+    /// add all dim[axis] and check other dims if equal.
+    auto out_dims = inputs_dims[0];
+    int in_zero_dims_size = out_dims.size();
+    for (size_t i = 1; i < n; i++) {
+        for (size_t j = 0; j < in_zero_dims_size; j++) {
+            if (j == axis) {
+                out_dims[axis] += inputs_dims[i][j];
+            } else {
+                assert(out_dims[j] == inputs_dims[i][j]);
+            }
+        }
+    }
+
+    if (out_dims[axis] < 0) {
+        out_dims[axis] = -1;
+    }
+
+    param_.Out()->Resize(out_dims);
+}
+template class ConcatOp<CPU, float>;
+
+} // namespace operators
+} // namespace paddle_mobile
diff --git a/src/operators/concat_op.h b/src/operators/concat_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..913fd3178767452baa62cb212dd572da9111cee7
--- /dev/null
+++ b/src/operators/concat_op.h
@@ -0,0 +1,51 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once
+#include "framework/operator.h"
+#include "operators/kernel/concat_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+
+using namespace framework;
+
+template <typename DeviceType, typename T>
+class ConcatOp : public framework::OperatorWithKernel<DeviceType> {
+  public:
+    ConcatOp(const std::string &type, const VariableNameMap &inputs,
+             const VariableNameMap &outputs,
+             const framework::AttributeMap attrs,
+             std::shared_ptr<framework::Scope> scope)
+        : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs,
+                                                    attrs, scope),
+          param_(inputs, outputs, attrs, *scope) {}
+
+    void Run() const {
+        operators::ConcatKernel<DeviceType, T> kernel;
+        kernel.Compute(param_);
+    }
+
+    using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+    void InferShape() const override;
+
+  protected:
+    ConcatParam param_;
+};
+
+} // namespace operators
+} // namespace paddle_mobile
diff --git a/src/operators/kernel/arm/concat_kernel.cpp b/src/operators/kernel/arm/concat_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..da650cee275a768e90729f69d1cca42f6abe853f
--- /dev/null
+++ b/src/operators/kernel/arm/concat_kernel.cpp
@@ -0,0 +1,117 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "operators/kernel/concat_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+template <typename T> class ConcatFunctor {
+  public:
+    void operator()(const std::vector<framework::Tensor> &input, const int axis,
+                    framework::Tensor *output) {
+        size_t num = input.size();
+        int rows = 1;
+        auto dim_0 = input[0].dims();
+        for (int i = 0; i < axis; ++i) {
+            rows *= dim_0[i];
+        }
+        int out_rows = rows, out_cols = 0;
+
+        std::vector<int64_t> input_cols(input.size());
+        for (int i = 0; i < num; ++i) {
+            int t_cols = input[i].numel() / rows;
+            out_cols += t_cols;
+            input_cols[i] = t_cols;
+        }
+
+        // computation
+        for (int k = 0; k < out_rows; ++k) {
+            T *dst_ptr = output->data<T>() + k * out_cols;
+            int col_idx = 0;
+            for (int j = 0; j < num; ++j) {
+                int col_len = input_cols[j];
+                const T *src_prt = input[j].data<T>() + k * col_len;
+                memory::Copy(dst_ptr + col_idx, src_prt, sizeof(T) * col_len);
+                col_idx += col_len;
+            }
+        }
+    }
+};
+template <typename T>
+void StridedNumelCopyWithAxis(int64_t axis, T *dst,
+                              const framework::DDim &dst_stride_numel,
+                              const T *src,
+                              const framework::DDim &src_stride_numel,
+                              int64_t size) {
+    int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
+    int64_t src_after = src_stride_numel[axis];
+    int64_t dst_after = dst_stride_numel[axis];
+
+    ///"src and dst tensor should have the same dims size."
+    assert(src_stride_numel.size() == dst_stride_numel.size());
+
+    for (int64_t i = 0; i < axis; ++i) {
+        if (i < axis) {
+            /// src and dst should have the same elements
+            /// except the specified axis.
+            assert(src_stride_numel[i] / src_stride_numel[axis] ==
+                   dst_stride_numel[i] / dst_stride_numel[axis]);
+
+        } else if (i == axis) {
+            continue;
+        } else {
+            /// "src and dst should have the same elements "
+            ///         "except the specified axis."
+            assert(src_stride_numel[i] == dst_stride_numel[i]);
+        }
+    }
+
+    for (int64_t i = 0; i < before; ++i) {
+        memory::Copy(dst + i * dst_after, src + i * src_after,
+                     sizeof(T) * size);
+    }
+}
+
+template <>
+void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
+    auto inputs = param.Inputs();
+    auto *out = param.Out();
+    int64_t axis = param.Axis();
+    out->mutable_data<float>();
+
+    /// Sometimes direct copies will be faster, this maybe need deeply analysis.
+    if (axis == 0 && inputs.size() < 10) {
+        size_t output_offset = 0;
+        for (auto *in : inputs) {
+            auto in_stride = framework::stride_numel(in->dims());
+            auto out_stride = framework::stride_numel(out->dims());
+            StridedNumelCopyWithAxis<float>(
+                axis, out->data<float>() + output_offset, out_stride,
+                in->data<float>(), in_stride, in_stride[axis]);
+            output_offset += in_stride[axis];
+        }
+    } else {
+        std::vector<framework::Tensor> inputs_concat(inputs.size());
+        for (int j = 0; j < inputs.size(); ++j) {
+            inputs_concat[j] = *inputs[j];
+        }
+        ConcatFunctor<float> concat_functor;
+        concat_functor(inputs_concat, static_cast<int>(axis), out);
+    }
+}
+
+} // namespace operators
+} // namespace paddle_mobile
diff --git a/src/operators/kernel/concat_kernel.h b/src/operators/kernel/concat_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..0580994a178bd3e976a459e9aaaae10079551b9c
--- /dev/null
+++ b/src/operators/kernel/concat_kernel.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+using namespace framework;
+
+template <typename DeviceType, typename T>
+class ConcatKernel : public framework::OpKernelBase<DeviceType, ConcatParam> {
+  public:
+    void Compute(const ConcatParam &param) const;
+};
+
+} // namespace operators
+} // namespace paddle_mobile
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 33bdb63a2ee269de90356a11e337260117aa9223..d03aa32f476b14b6cb4e5b67ea1ec9ea3439f413 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -51,7 +51,7 @@ class OpParam : PaddleMobileObject {
     template <typename T>
     static std::vector<T *> InputMultiFrom(const VariableNameMap &inputs,
                                            const Scope &scope) {
-        return GetMultiVarValue<T>("Input", inputs, scope);
+        return GetMultiVarValue<T>("X", inputs, scope);
     }
 
     template <typename T>
@@ -70,15 +70,15 @@ class OpParam : PaddleMobileObject {
     }
 
     template <typename T>
-    static const T GetAttr(std::string key, const AttributeMap &map) {
+    static const T GetAttr(const std::string &key, const AttributeMap &map) {
         return ((Attribute)map.at(key)).Get<T>();
     }
 
     template <typename T>
-    static T *GetVarValue(std::string key, const VariableNameMap &var_map,
-                          const Scope &scope) {
+    static T *GetVarValue(const std::string &key,
+                          const VariableNameMap &var_map, const Scope &scope) {
         auto var_vec = var_map.at(key);
-        if (var_vec.size()) {
+        if (!var_vec.empty()) {
             //      std::cout << " get var value -- " << var_vec[0] <<
             //      std::endl;
             auto var = scope.FindVar(var_vec[0]);
@@ -89,7 +89,7 @@ class OpParam : PaddleMobileObject {
     }
 
     template <typename T>
-    static std::vector<T *> GetMultiVarValue(std::string key,
+    static std::vector<T *> GetMultiVarValue(const std::string &key,
                                              const VariableNameMap &var_map,
                                              const Scope &scope) {
         auto var_vecs = var_map.at(key);
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 99b4f618ea57bf90d75d71ab6194aabab3be8952..79fbe2780a43573080e0891d44d4e96b1ca7dfa9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -11,6 +11,9 @@ target_link_libraries(test-mul-op paddle-mobile)
 ADD_EXECUTABLE(test-elementwiseadd-op  operators/test_elementwise_add_op.cpp test_helper.h  test_include.h)
 target_link_libraries(test-elementwiseadd-op paddle-mobile)
 
+# gen test
+ADD_EXECUTABLE(test-concat-op  operators/test_concat_op.cpp test_helper.h  test_include.h)
+target_link_libraries(test-concat-op paddle-mobile)
 
 # gen test log
 ADD_EXECUTABLE(test-log common/test_log.cpp)
diff --git a/test/operators/test_concat_op.cpp b/test/operators/test_concat_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f2c1038c78786fc02a907a52caaa8e6c611b76fe
--- /dev/null
+++ b/test/operators/test_concat_op.cpp
@@ -0,0 +1,196 @@
+
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once
+#include "../test_include.h"
+#include "operators/concat_op.h"
+
+namespace paddle_mobile {
+namespace framework {
+
+template <typename Dtype> class TestConcatOp {
+  public:
+    explicit TestConcatOp(const Program<Dtype> p) : program_(p) {
+        if (use_optimize_) {
+            to_predict_program_ = program_.optimizeProgram;
+        } else {
+            to_predict_program_ = program_.originProgram;
+        }
+
+        const std::vector<std::shared_ptr<BlockDesc>> blocks =
+            to_predict_program_->Blocks();
+        //  DLOG << " **block size " << blocks.size();
+        for (int i = 0; i < blocks.size(); ++i) {
+            std::shared_ptr<BlockDesc> block_desc = blocks[i];
+            std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+            //    DLOG << " ops " << ops.size();
+            for (int j = 0; j < ops.size(); ++j) {
+                std::shared_ptr<OpDesc> op = ops[j];
+                //                        if (op->Type() == "mul") {
+                //                            DLOG << "x_num_col_dims :
+                //                            "
+                //                                 << op->GetAttrMap()
+                //                                        .at("x_num_col_dims")
+                //                                        .Get<int>();
+                //                            DLOG << "y_num_col_dims :
+                //                            "
+                //                                 << op->GetAttrMap()
+                //                                        .at("y_num_col_dims")
+                //                                        .Get<int>();
+                //                            DLOG << " Input X is : "
+                //                            << op->Input("X")[0];
+                //                        }
+                //                        DLOG << "op:" << op->Type();
+                if (op->Type() == "concat" &&
+                    op->Input("X")[0] == "conv2d_3.tmp_1") {
+                    DLOG << " mul attr size: " << op->GetAttrMap().size();
+                    DLOG << " inputs size: " << op->GetInputs().size();
+                    DLOG << " outputs size: " << op->GetOutputs().size();
+                    DLOG << " Input X is : " << op->Input("X")[0];
+                    DLOG << " Output Out is : " << op->Output("Out")[0];
+                    DLOG << " axis : "
+                         << op->GetAttrMap().at("axis").Get<int>();
+
+                    std::shared_ptr<operators::ConcatOp<Dtype, float>> concat =
+                        std::make_shared<operators::ConcatOp<Dtype, float>>(
+                            op->Type(), op->GetInputs(), op->GetOutputs(),
+                            op->GetAttrMap(), program_.scope);
+                    ops_of_block_[*block_desc.get()].push_back(concat);
+                }
+            }
+        }
+    }
+
+    std::shared_ptr<Tensor> predict_concat(Tensor &t1, Tensor &t2, Tensor &t3,
+                                           Tensor &t4) {
+        // feed
+        auto scope = program_.scope;
+        Variable *x1_feed_value = scope->Var("conv2d_3.tmp_1");
+        auto tensor_x1 = x1_feed_value->GetMutable<Tensor>();
+        tensor_x1->ShareDataWith(t1);
+
+        Variable *x2_feed_value = scope->Var("conv2d_5.tmp_1");
+        auto tensor_x2 = x2_feed_value->GetMutable<Tensor>();
+        tensor_x2->ShareDataWith(t2);
+
+        Variable *x3_feed_value = scope->Var("conv2d_7.tmp_1");
+        auto tensor_x3 = x3_feed_value->GetMutable<Tensor>();
+        tensor_x3->ShareDataWith(t3);
+
+        Variable *x4_feed_value = scope->Var("conv2d_8.tmp_1");
+        auto tensor_x4 = x4_feed_value->GetMutable<Tensor>();
+        tensor_x4->ShareDataWith(t4);
+
+        Variable *con_output = scope->Var("concat_0.tmp_0");
+        auto *output_tensor = con_output->GetMutable<Tensor>();
+        output_tensor->mutable_data<float>({4, 100, 2, 2});
+        //  DLOG << typeid(output_tensor).name();
+        //  DLOG << "output_tensor dims: " << output_tensor->dims();
+
+        std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
+        out_tensor.reset(output_tensor);
+
+        predict_concat(t1, t2, t3, t4, 0);
+        return out_tensor;
+    }
+
+  private:
+    const framework::Program<Dtype> program_;
+    std::shared_ptr<ProgramDesc> to_predict_program_;
+    std::map<framework::BlockDesc,
+             std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+        ops_of_block_;
+    bool use_optimize_ = false;
+
+    void predict_concat(const Tensor &t1, const Tensor &t2, const Tensor &t3,
+                        const Tensor &t4, int block_id) {
+        std::shared_ptr<BlockDesc> to_predict_block =
+            to_predict_program_->Block(block_id);
+        for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
+             ++j) {
+            auto op = ops_of_block_[*to_predict_block.get()][j];
+            DLOG << "op -> run()";
+            op->Run();
+        }
+    }
+};
+
+template class TestConcatOp<CPU>;
+} // namespace framework
+} // namespace paddle_mobile
+
+int main() {
+    DLOG << "----------**********----------";
+    DLOG << "begin to run MulOp Test";
+    paddle_mobile::Loader<paddle_mobile::CPU> loader;
+    auto program = loader.Load(std::string("../../test/models/googlenet"));
+
+    /// input x (3,2,1,1)
+    paddle_mobile::framework::Tensor inputx1;
+    SetupTensor<float>(&inputx1, {4, 10, 2, 2}, static_cast<float>(0),
+                       static_cast<float>(1));
+    auto *inputx1_ptr = inputx1.data<float>();
+    /// input x (3,2,1,1)
+    paddle_mobile::framework::Tensor inputx2;
+    SetupTensor<float>(&inputx2, {4, 20, 2, 2}, static_cast<float>(0),
+                       static_cast<float>(1));
+    auto *inputx2_ptr = inputx2.data<float>();
+    /// input x (3,2,1,1)
+
+    paddle_mobile::framework::Tensor inputx3;
+    SetupTensor<float>(&inputx3, {4, 30, 2, 2}, static_cast<float>(0),
+                       static_cast<float>(1));
+    auto *inputx3_ptr = inputx3.data<float>();
+
+    /// input x (3,2,1,1)
+    paddle_mobile::framework::Tensor inputx4;
+    SetupTensor<float>(&inputx4, {4, 40, 2, 2}, static_cast<float>(0),
+                       static_cast<float>(1));
+    auto *inputx4_ptr = inputx4.data<float>();
+
+    paddle_mobile::framework::TestConcatOp<paddle_mobile::CPU> testConcatOp(
+        program);
+
+    auto output_concat =
+        testConcatOp.predict_concat(inputx1, inputx2, inputx3, inputx4);
+    auto *output_concat_ptr = output_concat->data<float>();
+
+    int input_n = 1;
+    int input_c = 2;
+    int input_h = 0;
+    int input_w = 1;
+    int stride0 = inputx3.numel() / inputx3.dims()[0];
+    int stride1 = inputx3.numel() / inputx3.dims()[0] / inputx3.dims()[1];
+    int stride2 = inputx3.dims()[3];
+    /// inputx1 (4,10,2,2),
+    /// inputx2 (4,20,2,2),
+    /// inputx3 (4,30,2,2),
+    /// inputx4 (4,40,2,2),
+    /// axis = 1
+    /// output (4,100,2,2)
+    int input_index =
+        input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
+    int output_index =
+        input_n * 100 * 2 * 2 +
+        (input_c + inputx1.dims()[1] + inputx2.dims()[1]) * 2 * 2 +
+        input_h * 2 + input_w;
+
+    DLOG << " inputx3[1,2,0,1] = " << inputx3_ptr[input_index];
+    DLOG << " output[1,12,0,1] = " << output_concat_ptr[output_index];
+    return 0;
+}
diff --git a/test/operators/test_elementwise_add_op.cpp b/test/operators/test_elementwise_add_op.cpp
index b967c2491846a66f73e5e218704b6a586b64efd4..be1f0705dfd3656ed5b1328e4162bf0a1cee91f6 100644
--- a/test/operators/test_elementwise_add_op.cpp
+++ b/test/operators/test_elementwise_add_op.cpp
@@ -25,7 +25,7 @@ namespace framework {
 
 template <typename Dtype> class TestElementwiseAddOp {
   public:
-    TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
+    explicit TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
         if (use_optimize_) {
             to_predict_program_ = program_.optimizeProgram;
         } else {
@@ -89,7 +89,7 @@ template <typename Dtype> class TestElementwiseAddOp {
         tensor_y->ShareDataWith(t2);
 
         Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
-        Tensor *output_tensor = con_output->GetMutable<Tensor>();
+        auto *output_tensor = con_output->GetMutable<Tensor>();
         output_tensor->mutable_data<float>({1, 3, 224, 224});
         //  DLOG << typeid(output_tensor).name();
         //  DLOG << "output_tensor dims: " << output_tensor->dims();
@@ -129,25 +129,25 @@ int main() {
     DLOG << "begin to run ElementAddOp Test";
     paddle_mobile::Loader<paddle_mobile::CPU> loader;
     auto program =
-        loader.Load(std::string("../../../test/models/"
+        loader.Load(std::string("../../test/models/"
                                 "image_classification_resnet.inference.model"));
 
     /// input x (1,3,224,224)
     paddle_mobile::framework::Tensor inputx;
     SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
                        static_cast<float>(1));
-    float *inputx_ptr = inputx.data<float>();
+    auto *inputx_ptr = inputx.data<float>();
     /// input y (224,)
     paddle_mobile::framework::Tensor inputy;
     SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
                        static_cast<float>(1));
-    float *inputy_ptr = inputy.data<float>();
+    auto *inputy_ptr = inputy.data<float>();
 
     paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
         testElementwiseAddOp(program);
 
     auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
-    float *output_add_ptr = output_add->data<float>();
+    auto *output_add_ptr = output_add->data<float>();
     //            for (int j = 0; j < output_add->numel(); ++j) {
     //                DLOG << "value of output: " << output_add_ptr[j];
     //            }
diff --git a/test/operators/test_mul_op.cpp b/test/operators/test_mul_op.cpp
index 2f92ba33c63af68ac790a6e4246c267b64070762..18375da8d2221c163b4e13d6e6dcfbf884f598d4 100644
--- a/test/operators/test_mul_op.cpp
+++ b/test/operators/test_mul_op.cpp
@@ -25,7 +25,7 @@ namespace framework {
 
 template <typename Dtype> class TestMulOp {
   public:
-    TestMulOp(const Program<Dtype> p) : program_(p) {
+    explicit TestMulOp(const Program<Dtype> p) : program_(p) {
         if (use_optimize_) {
             to_predict_program_ = program_.optimizeProgram;
         } else {
@@ -69,17 +69,17 @@ template <typename Dtype> class TestMulOp {
                     DLOG << "y_num_col_dims : "
                          << op->GetAttrMap().at("y_num_col_dims").Get<int>();
 
-                    std::shared_ptr<operators::MulOp<Dtype, float>> add =
+                    std::shared_ptr<operators::MulOp<Dtype, float>> mul =
                         std::make_shared<operators::MulOp<Dtype, float>>(
                             op->Type(), op->GetInputs(), op->GetOutputs(),
                             op->GetAttrMap(), program_.scope);
-                    ops_of_block_[*block_desc.get()].push_back(add);
+                    ops_of_block_[*block_desc.get()].push_back(mul);
                 }
             }
         }
     }
 
-    std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
+    std::shared_ptr<Tensor> predict_mul(Tensor &t1, Tensor &t2) {
         // feed
         auto scope = program_.scope;
         Variable *x_feed_value = scope->Var("pool2d_0.tmp_0");
@@ -91,7 +91,7 @@ template <typename Dtype> class TestMulOp {
         tensor_y->ShareDataWith(t2);
 
         Variable *con_output = scope->Var("fc_0.tmp_0");
-        Tensor *output_tensor = con_output->GetMutable<Tensor>();
+        auto *output_tensor = con_output->GetMutable<Tensor>();
         output_tensor->mutable_data<float>({3, 3});
         //  DLOG << typeid(output_tensor).name();
         //  DLOG << "output_tensor dims: " << output_tensor->dims();
@@ -99,7 +99,7 @@ template <typename Dtype> class TestMulOp {
         std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
         out_tensor.reset(output_tensor);
 
-        predict_add(t1, t2, 0);
+        predict_mul(t1, t2, 0);
         return out_tensor;
     }
 
@@ -111,7 +111,7 @@ template <typename Dtype> class TestMulOp {
         ops_of_block_;
     bool use_optimize_ = false;
 
-    void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+    void predict_mul(const Tensor &t1, const Tensor &t2, int block_id) {
         std::shared_ptr<BlockDesc> to_predict_block =
             to_predict_program_->Block(block_id);
         for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
@@ -132,25 +132,25 @@ int main() {
     DLOG << "begin to run MulOp Test";
     paddle_mobile::Loader<paddle_mobile::CPU> loader;
     auto program =
-        loader.Load(std::string("../../../test/models/"
+        loader.Load(std::string("../../test/models/"
                                 "image_classification_resnet.inference.model"));
 
     /// input x (3,2,1,1)
     paddle_mobile::framework::Tensor inputx;
     SetupTensor<float>(&inputx, {3, 2, 1, 1}, static_cast<float>(0),
                        static_cast<float>(1));
-    float *inputx_ptr = inputx.data<float>();
+    auto *inputx_ptr = inputx.data<float>();
 
     /// input y (2,3)
     paddle_mobile::framework::Tensor inputy;
     SetupTensor<float>(&inputy, {2, 3}, static_cast<float>(0),
                        static_cast<float>(1));
-    float *inputy_ptr = inputy.data<float>();
+    auto *inputy_ptr = inputy.data<float>();
 
     paddle_mobile::framework::TestMulOp<paddle_mobile::CPU> testMulOp(program);
 
-    auto output_mul = testMulOp.predict_add(inputx, inputy);
-    float *output_mul_ptr = output_mul->data<float>();
+    auto output_mul = testMulOp.predict_mul(inputx, inputy);
+    auto *output_mul_ptr = output_mul->data<float>();
 
     auto dimx_1 = inputx.numel() / inputx.dims()[0];
     DLOG << " inputx : ";