diff --git a/CMakeLists.txt b/CMakeLists.txt
index 18cf94f303280c0c761b3d163a7f8069e0697833..d999a354676281e55c76606e532e5c36808d7b31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,5 +46,6 @@ target_link_libraries(paddle-mobile-static protobuf-lite openblas)
 add_dependencies(paddle-mobile openblas_proj)
 
 # gen test
-ADD_EXECUTABLE(paddle-mobile-test test/main.cpp test/test_helper.h)
+ADD_EXECUTABLE(paddle-mobile-test test/main.cpp test/test_helper.h
+        test/elementwise_add_op_test.h test/test_include.h)
 target_link_libraries(paddle-mobile-test paddle-mobile)
diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e0b702cf075f2d3c8a27be14ab7506b76c60d7e1
--- /dev/null
+++ b/src/operators/elementwise_add_op.cpp
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#include "elementwise_add_op.h"
+
+namespace paddle_mobile {
+    namespace operators {
+
+        template <typename Dtype, typename T>
+        void ElementwiseAddOp<Dtype, T>::InferShape() const {
+            auto x_dim = param_.InputX()->dims();
+            param_.Out()->Resize(x_dim);
+        }
+        template class ElementwiseAddOp<CPU, float>;
+    }
+}
diff --git a/src/operators/elementwise_add_op.h b/src/operators/elementwise_add_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..d3e347584006b3d69684463f8a1267371998e9b7
--- /dev/null
+++ b/src/operators/elementwise_add_op.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+
+#include "framework/operator.h"
+#include "kernel/elementwise_add_kernel.h"
+#include "op_param.h"
+
+namespace paddle_mobile {
+    namespace operators {
+
+        using namespace framework;
+
+        template <typename DeviceType, typename T>
+        class ElementwiseAddOp
+            : public framework::OperatorWithKernel<DeviceType> {
+          public:
+            ElementwiseAddOp(const std::string &type,
+                             const VariableNameMap &inputs,
+                             const VariableNameMap &outputs,
+                             const framework::AttributeMap attrs,
+                             std::shared_ptr<framework::Scope> scope)
+                : framework::OperatorWithKernel<DeviceType>(
+                      type, inputs, outputs, attrs, scope),
+                  param_(inputs, outputs, attrs, *scope) {}
+
+            void RunImpl() const {
+                operators::ElementwiseAddKernel<DeviceType, T,
+                                                ElementwiseAddParam>
+                    kernel;
+                kernel.Compute(param_);
+            }
+
+            using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
+            void InferShape() const override;
+
+          protected:
+            ElementwiseAddParam param_;
+        };
+    }
+}
diff --git a/src/operators/kernel/arm/elementwise_add_kernel.cpp b/src/operators/kernel/arm/elementwise_add_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4eba2ed1002f537a77fb6fd44ef11655d358e1cf
--- /dev/null
+++ b/src/operators/kernel/arm/elementwise_add_kernel.cpp
@@ -0,0 +1,41 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "operators/kernel/elementwise_add_kernel.h"
+
+namespace paddle_mobile {
+    namespace operators {
+
+        template <typename T> struct AddFunctor {
+            inline T operator()(T a, T b) const { return a + b; }
+        };
+
+        template <>
+        void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
+            const ElementwiseAddParam &param) const {
+            const Tensor *input_x = param.InputX();
+            const Tensor *input_y = param.InputY();
+            Tensor *Out = param.Out();
+            Out->mutable_data<float>();
+            const int axis = param.Axis();
+            ElementwiseComputeEx<AddFunctor<float>, float>(
+                input_x, input_y, axis, AddFunctor<float>(), Out);
+        }
+
+        template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
+
+    } // namespace operators
+} // namespace paddle
diff --git a/src/operators/kernel/elementwise_add_kernel.h b/src/operators/kernel/elementwise_add_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..74aeb4f16d7b228a3ae7e7daa4456120fe785881
--- /dev/null
+++ b/src/operators/kernel/elementwise_add_kernel.h
@@ -0,0 +1,36 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once;
+
+#include "framework/operator.h"
+#include "operators/math/elementwise_op_function.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+    namespace operators {
+
+        using namespace framework;
+
+        template <typename DeviceType, typename T, typename P>
+        class ElementwiseAddKernel
+            : public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
+          public:
+            void Compute(const ElementwiseAddParam &param) const;
+        };
+    }
+}
diff --git a/src/operators/math/elementwise_op_function.h b/src/operators/math/elementwise_op_function.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c02de22d9909a56a8382f07c53844c7ccd24b
--- /dev/null
+++ b/src/operators/math/elementwise_op_function.h
@@ -0,0 +1,211 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "transform.h"
+
+#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
+
+namespace paddle_mobile {
+    namespace operators {
+
+        /*
+         * Out = X ⊙ Y
+         * If Y's shape does not match X' shape, they will be reshaped.
+         * For example:
+         * 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+         *    pre=2, n=3*4, post=5
+         *    x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
+         * 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
+         *    pre=2*3, n=4*5, post=1
+         *    x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
+         */
+        inline void get_mid_dims(const framework::DDim &x_dims,
+                                 const framework::DDim &y_dims, const int axis,
+                                 int *pre, int *n, int *post) {
+            *pre = 1;
+            *n = 1;
+            *post = 1;
+            // compute pre
+            for (int i = 0; i < axis; ++i) {
+                (*pre) *= x_dims[i];
+            }
+
+            for (int i = 0; i < y_dims.size(); ++i) {
+                assert(x_dims[i + axis] == y_dims[i]);
+                /// "Broadcast dimension mismatch.");
+                (*n) *= y_dims[i];
+            }
+
+            for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
+                (*post) *= x_dims[i];
+            }
+        }
+
+        /// remove dims tail 1. (4,20,1,1) -> (4,20)
+        inline void trim_trailing_singular_dims(framework::DDim *dims) {
+            // Remove trailing dimensions of size 1 for y
+            auto actual_dims_size = dims->size();
+            for (; actual_dims_size != 0; --actual_dims_size) {
+                if ((*dims)[actual_dims_size - 1] != 1)
+                    break;
+            }
+            if (actual_dims_size != dims->size()) {
+                auto actual_dims = framework::vectorize(*dims);
+                actual_dims.resize(actual_dims_size);
+                *dims = framework::make_ddim(actual_dims);
+            }
+        }
+
+        template <typename T> class RowwiseTransformIterator {
+          public:
+            RowwiseTransformIterator(const T *ptr, int n)
+                : ptr_(ptr), i_(0), n_(n) {}
+
+            RowwiseTransformIterator<T> &operator++() {
+                ++i_;
+                if (UNLIKELY(i_ == n_)) {
+                    i_ = 0;
+                }
+                return *this;
+            }
+
+            bool operator==(const RowwiseTransformIterator<T> &rhs) const {
+                return (ptr_ + i_) == &(*rhs);
+            }
+
+            bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
+                return (ptr_ + i_) != &(*rhs);
+            }
+
+            const T &operator*() { return ptr_[i_]; }
+
+          private:
+            const T *ptr_;
+            int i_;
+            int64_t n_;
+        };
+
+        /// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
+        /// dimension
+        /// in (4,20,2) is 2 ,
+        /// (20,1) move 1 stride , to fill(add) 2 element with the same number.
+        template <typename T> class MidWiseTransformIterator {
+          public:
+            MidWiseTransformIterator(const T *ptr, int n, int post)
+                : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
+
+            MidWiseTransformIterator<T> &operator++() {
+                ++j_;
+                if (UNLIKELY(j_ == post_)) {
+                    ++i_;
+                    j_ = 0;
+                    if (UNLIKELY(i_ == n_)) {
+                        i_ = 0;
+                    }
+                }
+                return *this;
+            }
+
+            bool operator==(const MidWiseTransformIterator<T> &rhs) const {
+                return (ptr_ + i_) == &(*rhs);
+            }
+
+            bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
+                return (ptr_ + i_) != &(*rhs);
+            }
+
+            const T &operator*() { return ptr_[i_]; }
+
+          private:
+            const T *ptr_;
+            int64_t i_;
+            int64_t j_;
+            int64_t n_;
+            int64_t post_;
+        };
+
+        template <typename Functor, typename T, typename OutType = T>
+        class TransformFunctor {
+          public:
+            TransformFunctor(const framework::Tensor *x,
+                             const framework::Tensor *y, framework::Tensor *z,
+                             Functor func)
+                : x_(x->data<T>()), y_(y->data<T>()),
+                  z_(z->mutable_data<OutType>()), nx_(x->numel()), func_(func) {
+            }
+
+            inline void Run() const {
+                math::Transform trans;
+                // 同时执行func(x_, y_)传入z_。
+                trans(x_, x_ + nx_, y_, z_, func_);
+            }
+
+            inline void RunRowWise(int n, int pre) const {
+                math::Transform trans;
+                trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_,
+                      func_);
+            }
+
+            inline void RunMidWise(int n, int pre, int post) const {
+                math::Transform trans;
+                trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post),
+                      z_, func_);
+            }
+
+          private:
+            const T *x_;
+            const T *y_;
+            OutType *z_;
+            int64_t nx_;
+            Functor func_;
+        };
+
+        template <typename Functor, typename T, typename OutType = T>
+        void ElementwiseComputeEx(const framework::Tensor *x,
+                                  const framework::Tensor *y, int axis,
+                                  Functor func, framework::Tensor *z) {
+            TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
+
+            auto x_dims = x->dims();
+            auto y_dims = y->dims();
+            // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
+            //                  "Rank of first input must >= rank of second
+            //                  input.");
+
+            if (x_dims == y_dims) {
+                functor.Run();
+                return;
+            }
+
+            /// axis = -1 represent the last dimension.
+            axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
+            // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
+            //               "Axis should be in range [0, x_dims)");
+            trim_trailing_singular_dims(&y_dims);
+            axis = (y_dims.size() == 0) ? x_dims.size() : axis;
+
+            int pre, n, post;
+            get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
+            if (post == 1) {
+                functor.RunRowWise(n, pre);
+                return;
+            } else {
+                functor.RunMidWise(n, pre, post);
+                return;
+            }
+        }
+
+    } // namespace operators
+} // namespace paddle
diff --git a/src/operators/math/transform.h b/src/operators/math/transform.h
new file mode 100644
index 0000000000000000000000000000000000000000..55a351c90dfcb9057db14eeb8e932fba1e25baa2
--- /dev/null
+++ b/src/operators/math/transform.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <algorithm>
+
+namespace paddle_mobile {
+    namespace operators {
+        namespace math {
+
+            // Transform applys a unary or a binary functor on each element in a
+            // range defined by a pair of iterators.
+            //
+            // - The specialization for CPU calls std::transform.
+            // - The specialization for CUDA calls thrust::tranform.
+            //
+            // NOTE: We need to define InputIter and OutputIter defined as
+            //       different types, because the InputIter points op's inputs
+            //       and
+            //       OutputIter pints to op's outputs.
+            //
+            // NOTE: We don't assume that InputIter to be const InputType* and
+            //       OutputIter to be OutputType*, because we might use a
+            //       iterator
+            //       class, paddle::fluid::operators::RowwiseTRansformIterator.
+
+            struct Transform {
+                template <typename InputIter, typename OutputIter,
+                          typename UnaryOperation>
+                void operator()(InputIter first, InputIter last,
+                                OutputIter result, UnaryOperation op) {
+                    std::transform(first, last, result, op);
+                }
+
+                template <typename InputIter1, typename InputIter2,
+                          typename OutputIter, typename BinaryOperation>
+                void operator()(InputIter1 first1, InputIter1 last1,
+                                InputIter2 first2, OutputIter result,
+                                BinaryOperation op) {
+                    std::transform(first1, last1, first2, result, op);
+                }
+            };
+        }
+    } // namespace platform
+} // namespace paddle
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 9160cdbf4b2bf864adfbfc7d230e7358f951b95c..29d1ef23b7cbab7f7daa4e1d1e535f4e49c8611a 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -38,12 +38,36 @@ namespace paddle_mobile {
                 return GetVarValue<T>("Input", inputs, scope);
             }
 
+            template <typename T>
+            static T *InputXFrom(const VariableNameMap &inputs,
+                                 const Scope &scope) {
+                return GetVarValue<T>("X", inputs, scope);
+            }
+
+            template <typename T>
+            static T *InputYFrom(const VariableNameMap &inputs,
+                                 const Scope &scope) {
+                return GetVarValue<T>("Y", inputs, scope);
+            }
+
+            template <typename T>
+            static std::vector<T *>
+            InputMultiFrom(const VariableNameMap &inputs, const Scope &scope) {
+                return GetMultiVarValue<T>("Input", inputs, scope);
+            }
+
             template <typename T>
             static T *OutputFrom(const VariableNameMap &outputs,
                                  const Scope &scope) {
                 return GetVarValue<T>("Output", outputs, scope);
             }
 
+            template <typename T>
+            static T *OutFrom(const VariableNameMap &outputs,
+                              const Scope &scope) {
+                return GetVarValue<T>("Out", outputs, scope);
+            }
+
             template <typename T>
             static T *FilterFrom(const VariableNameMap &inputs,
                                  const Scope &scope) {
@@ -69,6 +93,20 @@ namespace paddle_mobile {
                     return nullptr;
                 }
             }
+
+            template <typename T>
+            static std::vector<T *>
+            GetMultiVarValue(std::string key, const VariableNameMap &var_map,
+                             const Scope &scope) {
+                auto var_vecs = var_map.at(key);
+                assert(var_vecs.size() > 1);
+                std::vector<T *> var_res;
+                for (auto &var_vec : var_vecs) {
+                    auto var = scope.FindVar(var_vec);
+                    var_res.push_back(var->GetMutable<T>());
+                }
+                return var_res;
+            }
         };
 
         class ConvParam : OpParam {
@@ -112,5 +150,86 @@ namespace paddle_mobile {
 
         std::ostream &operator<<(std::ostream &os, const ConvParam &conv_param);
 
+        class ElementwiseAddParam : OpParam {
+          public:
+            ElementwiseAddParam(const VariableNameMap &inputs,
+                                const VariableNameMap &outputs,
+                                const framework::AttributeMap &attrs,
+                                const framework::Scope &scope) {
+                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+                out_ = OutFrom<framework::Tensor>(outputs, scope);
+                axis_ = GetAttr<int>("axis", attrs);
+            }
+
+            const Tensor *InputX() const { return input_x_; }
+
+            const Tensor *InputY() const { return input_y_; }
+
+            Tensor *Out() const { return out_; }
+
+            const int &Axis() const { return axis_; }
+
+          private:
+            Tensor *input_x_;
+            Tensor *input_y_;
+            Tensor *out_;
+            int axis_;
+        };
+
+        class MulParam : OpParam {
+          public:
+            MulParam(const VariableNameMap &inputs,
+                     const VariableNameMap &outputs,
+                     const framework::AttributeMap &attrs,
+                     const framework::Scope &scope) {
+                input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
+                input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
+                out_ = OutFrom<framework::Tensor>(outputs, scope);
+                x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
+                y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
+            }
+
+            const Tensor *InputX() const { return input_x_; }
+
+            const Tensor *InputY() const { return input_y_; }
+
+            Tensor *Out() const { return out_; }
+
+            const int &XNumColDims() const { return x_num_col_dims_; }
+
+            const int &YNumColDims() const { return y_num_col_dims_; }
+
+          private:
+            Tensor *input_x_;
+            Tensor *input_y_;
+            Tensor *out_;
+            int x_num_col_dims_;
+            int y_num_col_dims_;
+        };
+
+        class ConcatParam : public OpParam {
+          public:
+            ConcatParam(const VariableNameMap &inputs,
+                        const VariableNameMap &outputs,
+                        const framework::AttributeMap &attrs,
+                        const framework::Scope &scope) {
+                inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
+                out_ = OutFrom<framework::Tensor>(outputs, scope);
+                axis_ = GetAttr<int>("axis", attrs);
+            }
+
+            std::vector<Tensor *> Inputs() const { return inputs_; }
+
+            Tensor *Out() const { return out_; }
+
+            const int &Axis() const { return axis_; }
+
+          private:
+            std::vector<Tensor *> inputs_;
+            Tensor *out_;
+            int axis_;
+        };
+
     } // namespace operators
 } // namespace paddle_mobile
diff --git a/test/elementwise_add_op_test.h b/test/elementwise_add_op_test.h
new file mode 100644
index 0000000000000000000000000000000000000000..2389b457ac3725dada65999f3babd9f7ebf3034d
--- /dev/null
+++ b/test/elementwise_add_op_test.h
@@ -0,0 +1,176 @@
+
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================*/
+#pragma once
+#include "operators/elementwise_add_op.h"
+#include "test_include.h"
+
+namespace paddle_mobile {
+    namespace framework {
+
+        template <typename Dtype> class TestElementwiseAddOp {
+          public:
+            TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
+                if (use_optimize_) {
+                    to_predict_program_ = program_.optimizeProgram;
+                } else {
+                    to_predict_program_ = program_.originProgram;
+                }
+
+                const std::vector<std::shared_ptr<BlockDesc>> blocks =
+                    to_predict_program_->Blocks();
+                //  std::cout << " **block size " << blocks.size() << std::endl;
+                for (int i = 0; i < blocks.size(); ++i) {
+                    std::shared_ptr<BlockDesc> block_desc = blocks[i];
+                    std::vector<std::shared_ptr<OpDesc>> ops =
+                        block_desc->Ops();
+                    //    std::cout << " ops " << ops.size() << std::endl;
+                    for (int j = 0; j < ops.size(); ++j) {
+                        std::shared_ptr<OpDesc> op = ops[j];
+                        if (op->Type() == "elementwise_add") {
+                            if (op->GetAttrMap().at("axis").Get<int>() != -1) {
+                                std::cout
+                                    << "attr: axis = "
+                                    << op->GetAttrMap().at("axis").Get<int>()
+                                    << std::endl;
+                            }
+                        }
+                        std::cout << "op:" << op->Type() << std::endl;
+                        if (op->Type() == "elementwise_add" &&
+                            op->Input("X")[0] == "batch_norm_2.tmp_2") {
+                            std::cout << " elementwise_add attr size: "
+                                      << op->GetAttrMap().size() << std::endl;
+                            std::cout
+                                << " inputs size: " << op->GetInputs().size()
+                                << std::endl;
+                            std::cout
+                                << " outputs size: " << op->GetOutputs().size()
+                                << std::endl;
+                            std::cout << " Input X is : " << op->Input("X")[0]
+                                      << std::endl;
+                            std::cout << " Input Y is : " << op->Input("Y")[0]
+                                      << std::endl;
+                            std::cout
+                                << " Output Out is : " << op->Output("Out")[0]
+                                << std::endl;
+                            Attribute axis_attr = op->GetAttrMap().at("axis");
+                            int axis = axis_attr.Get<int>();
+                            std::cout << " Attr axis is : " << axis
+                                      << std::endl;
+
+                            std::shared_ptr<
+                                operators::ElementwiseAddOp<Dtype, float>>
+                                add = std::make_shared<
+                                    operators::ElementwiseAddOp<Dtype, float>>(
+                                    op->Type(), op->GetInputs(),
+                                    op->GetOutputs(), op->GetAttrMap(),
+                                    program_.scope);
+                            ops_of_block_[*block_desc.get()].push_back(add);
+                        }
+                    }
+                }
+            }
+
+            std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
+                // feed
+                auto scope = program_.scope;
+                Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
+                auto tensor_x = x_feed_value->GetMutable<Tensor>();
+                tensor_x->ShareDataWith(t1);
+
+                Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
+                auto tensor_y = y_feed_value->GetMutable<Tensor>();
+                tensor_y->ShareDataWith(t2);
+
+                Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
+                Tensor *output_tensor = con_output->GetMutable<Tensor>();
+                output_tensor->mutable_data<float>({1, 3, 224, 224});
+                //  std::cout << typeid(output_tensor).name() << std::endl;
+                //  std::cout << "output_tensor dims: " << output_tensor->dims()
+                //  <<
+                //  std::endl;
+
+                std::shared_ptr<Tensor> out_tensor =
+                    std::make_shared<LoDTensor>();
+                out_tensor.reset(output_tensor);
+
+                predict_add(t1, t2, 0);
+                return out_tensor;
+            }
+
+          private:
+            const framework::Program<Dtype> program_;
+            std::shared_ptr<ProgramDesc> to_predict_program_;
+            std::map<framework::BlockDesc,
+                     std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
+                ops_of_block_;
+            bool use_optimize_ = false;
+
+            void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
+                std::shared_ptr<BlockDesc> to_predict_block =
+                    to_predict_program_->Block(block_id);
+                for (int j = 0;
+                     j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
+                    auto op = ops_of_block_[*to_predict_block.get()][j];
+                    std::cout << "op -> run()" << std::endl;
+                    op->Run();
+                }
+            }
+        };
+
+        template class TestElementwiseAddOp<CPU>;
+    } // namespace framework
+
+    namespace test {
+        void testElementwiseAdd() {
+            paddle_mobile::Loader<paddle_mobile::CPU> loader;
+            auto program = loader.Load(
+                std::string("../../test/models/"
+                            "image_classification_resnet.inference.model"));
+
+            /// input x (1,3,224,224)
+            paddle_mobile::framework::Tensor inputx;
+            SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
+                               static_cast<float>(1));
+            float *inputx_ptr = inputx.data<float>();
+            /// input y (224,)
+            paddle_mobile::framework::Tensor inputy;
+            SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
+                               static_cast<float>(1));
+            float *inputy_ptr = inputy.data<float>();
+
+            paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
+                testElementwiseAddOp(program);
+
+            auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
+            float *output_add_ptr = output_add->data<float>();
+            for (int j = 0; j < output_add->numel(); ++j) {
+                // std::cout << "value of output: " << output_add_ptr[j] <<
+                // std::endl;
+            }
+
+            /// output (1,3,224,224)
+            std::cout << "output memory size : " << output_add->memory_size()
+                      << std::endl;
+            std::cout << "output numel : " << output_add->numel() << std::endl;
+
+            std::cout << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
+                      << output_add_ptr[226] << std::endl;
+        }
+    } // namespace test
+} // namespace paddle_mobile
diff --git a/test/main.cpp b/test/main.cpp
index 41ac36b39cc8e03c827beea00b8a3fbf7854ca3b..14e955a77bb73a29b9b6f0ca1acf883ec371aca3 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -16,6 +16,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
 
+#include "elementwise_add_op_test.h"
 #include "framework/executor.h"
 #include "io.h"
 #include "test_helper.h"
@@ -36,44 +37,44 @@ SOFTWARE.
 //}
 
 int main() {
-  std::string data_set = "cifar10";
-  //
-  //    if (data_set == "cifar10") {
-  //        SetupTensor<float>(&input, {FLAGS_batch_size, 3, 32, 32},
-  //                           static_cast<float>(0), static_cast<float>(1));
-  //    } else if (data_set == "imagenet") {
-  //        SetupTensor<float>(&input, {FLAGS_batch_size, 3, 224, 224},
-  //                           static_cast<float>(0), static_cast<float>(1));
-  //    } else {
-  //        LOG(FATAL) << "Only cifar10 or imagenet is supported.";
-  //    }
+    std::string data_set = "cifar10";
+    //
+    //    if (data_set == "cifar10") {
+    //        SetupTensor<float>(&input, {FLAGS_batch_size, 3, 32, 32},
+    //                           static_cast<float>(0), static_cast<float>(1));
+    //    } else if (data_set == "imagenet") {
+    //        SetupTensor<float>(&input, {FLAGS_batch_size, 3, 224, 224},
+    //                           static_cast<float>(0), static_cast<float>(1));
+    //    } else {
+    //        LOG(FATAL) << "Only cifar10 or imagenet is supported.";
+    //    }
 
-  paddle_mobile::Loader<paddle_mobile::CPU> loader;
-  auto program = loader.Load(std::string(
-      "../../test/models/image_classification_resnet.inference.model"));
+    paddle_mobile::Loader<paddle_mobile::CPU> loader;
+    auto program = loader.Load(std::string(
+        "../../test/models/image_classification_resnet.inference.model"));
 
-  paddle_mobile::framework::Executor<paddle_mobile::CPU> executor(program);
+    paddle_mobile::framework::Executor<paddle_mobile::CPU> executor(program);
 
-  paddle_mobile::framework::Tensor input;
-  SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
-                     static_cast<float>(1));
-  float *input_ptr = input.data<float>();
-  for (int i = 0; i < input.numel(); ++i) {
-    //    std::cout << input_ptr[i] << std::endl;
-  }
+    paddle_mobile::framework::Tensor input;
+    SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
+    float *input_ptr = input.data<float>();
+    for (int i = 0; i < input.numel(); ++i) {
+        //    std::cout << input_ptr[i] << std::endl;
+    }
 
-  //  std::cout << "input: " << input.memory_size() << std::endl;
-  //  std::cout << "input: " << input.numel() << std::endl;
+    //  std::cout << "input: " << input.memory_size() << std::endl;
+    //  std::cout << "input: " << input.numel() << std::endl;
 
-  auto output = executor.predict(input);
+    auto output = executor.predict(input);
 
-  //  std::cout << "output: " << output->memory_size() << std::endl;
-  //  std::cout << "output: " << output->numel() << std::endl;
+    //  std::cout << "output: " << output->memory_size() << std::endl;
+    //  std::cout << "output: " << output->numel() << std::endl;
 
-  //  float* output_ptr = output->data<float>();
-  //  for (int j = 0; j < output->numel(); ++j) {
-  //    std::cout << " value of output: " << output_ptr[j] << std::endl;
-  //  }
-
-  return 0;
+    //  float* output_ptr = output->data<float>();
+    //  for (int j = 0; j < output->numel(); ++j) {
+    //    std::cout << " value of output: " << output_ptr[j] << std::endl;
+    //
+    paddle_mobile::test::testElementwiseAdd();
+    return 0;
 }
diff --git a/test/test_helper.h b/test/test_helper.h
index 321fe53fa1e32d144d2c31ceb92506ec4cbe72f3..c189aa123a59e35aab2fa15285a69b176c101025 100644
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -15,20 +15,21 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ==============================================================================*/
-
-#include <random>
+#pragma once
 #include "framework/ddim.h"
 #include "framework/tensor.h"
+#include <random>
 
 template <typename T>
-void SetupTensor(paddle_mobile::framework::Tensor* input,
+void SetupTensor(paddle_mobile::framework::Tensor *input,
                  paddle_mobile::framework::DDim dims, T lower, T upper) {
-  static unsigned int seed = 100;
-  std::mt19937 rng(seed++);
-  std::uniform_real_distribution<double> uniform_dist(0, 1);
+    static unsigned int seed = 100;
+    std::mt19937 rng(seed++);
+    std::uniform_real_distribution<double> uniform_dist(0, 1);
 
-  T* input_ptr = input->mutable_data<T>(dims);
-  for (int i = 0; i < input->numel(); ++i) {
-    input_ptr[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
-  }
+    T *input_ptr = input->mutable_data<T>(dims);
+    for (int i = 0; i < input->numel(); ++i) {
+        input_ptr[i] =
+            static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
+    }
 }
diff --git a/test/test_include.h b/test/test_include.h
new file mode 100644
index 0000000000000000000000000000000000000000..476c2df9389d1b6a75be0f3e68d58b5bb556083d
--- /dev/null
+++ b/test/test_include.h
@@ -0,0 +1,15 @@
+#include "framework/block_desc.h"
+#include "framework/framework.pb.h"
+#include "framework/lod_tensor.h"
+#include "framework/operator.h"
+#include "framework/program.h"
+#include "framework/program_desc.h"
+#include "framework/scope.h"
+#include "framework/tensor.h"
+#include "framework/variable.h"
+#include "framework/variable.h"
+#include "io.h"
+#include "test_helper.h"
+#include <map>
+#include <string>
+#include <vector>
\ No newline at end of file