diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index f466dbc79a2059faa1e3d4ad6ede3f2394580842..f0fd12f1b5276d033ea086c60c80616fb1be7585 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -47,6 +47,7 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 op_library(gather_op SRCS gather_op.cc gather_op.cu)
 
 cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
+op_library(scatter_op SRCS scatter_op.cc scatter_op.cu)
 
 cc_library(net_op SRCS net_op.cc DEPS op_registry)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f901edefa22dc9a252e87116df756d04767a7162
--- /dev/null
+++ b/paddle/operators/scatter_op.cc
@@ -0,0 +1,86 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/scatter_op.h"
+#include "paddle/framework/ddim.h"
+
+namespace paddle {
+namespace operators {
+
+class ScatterOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Index")->dims().size(), 1,
+                      "Update Index should be 1-D.");
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Ref")->dims().size(),
+                      ctx.Input<Tensor>("Updates")->dims().size(),
+                      "Reference and Updates should have the same shape size");
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Updates")->dims()[0],
+                      ctx.Input<Tensor>("Index")->dims()[0],
+                      "Updates and Index should have same batch-size.");
+    framework::DDim data_dim(ctx.Input<Tensor>("Updates")->dims());
+    for (int i = 1; i < data_dim.size(); ++i)
+      PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input<Tensor>("Updates")->dims()[i]);
+    ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("Ref")->dims());
+  }
+};
+
+class ScatterGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto *Updates = ctx.Input<Tensor>("Updates");
+    auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto *Ref = ctx.Input<Tensor>("Ref");
+
+    dRef->Resize(Ref->dims());
+    dUpdates->Resize(Updates->dims());
+  }
+};
+
+class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ScatterOpMaker(framework::OpProto *proto,
+                 framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Ref", "The source input of scatter op");
+    AddInput("Index",
+             "The index input of scatter op where Ref will be updated");
+    AddInput("Updates", "The updated value of updates op");
+    AddOutput("Out", "The output of add op");
+    AddComment(R"DOC(
+Scatter Operator by selecting from the first axis, 
+
+Out = Ref
+Out[Index] = Ref[Index] + Updates
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(scatter, ops::ScatterOp, ops::ScatterOpMaker, scatter_grad,
+            ops::ScatterGradOp);
+REGISTER_OP_CPU_KERNEL(scatter,
+                       ops::ScatterOpKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    scatter_grad,
+    ops::ScatterGradientOpKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6716b478833ff3adb6112cdb1ee25b7f1744ea1f
--- /dev/null
+++ b/paddle/operators/scatter_op.cu
@@ -0,0 +1,20 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/scatter_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(scatter,
+                       ops::ScatterOpKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..e9595638a86a4a4536ddad4e6f20fd80a54b1608
--- /dev/null
+++ b/paddle/operators/scatter_op.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "gather.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "scatter.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename Place, typename T>
+class ScatterOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *Ref = ctx.Input<Tensor>("Ref");
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *Updates = ctx.Input<Tensor>("Updates");
+    auto *Out = ctx.Output<Tensor>("Out");
+
+    // In place output: Out = Ref, Out[Index] += Updates
+    Out->ShareDataWith<T>(*Ref);
+    // Apply ScatterUpdate: Out[index] += Updates[:]
+    ScatterUpdate<T>(ctx.GetPlace(), Updates, Index, Out);
+  }
+};
+
+template <typename Place, typename T>
+class ScatterGradientOpKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext &ctx) const override {
+    auto *dRef = ctx.Output<Tensor>(framework::GradVarName("Ref"));
+    auto *dUpdates = ctx.Output<Tensor>(framework::GradVarName("Updates"));
+    auto *Index = ctx.Input<Tensor>("Index");
+    auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
+
+    // In place gradient: dRef = dO
+    dRef->ShareDataWith<T>(*dOut);
+    dUpdates->mutable_data<T>(ctx.GetPlace());
+    // Gradient by Gather: dUpdates += dO[Index]
+    Gather<T>(ctx.GetPlace(), dOut, Index, dUpdates);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt
index abb9c248eee9c59e8e6b9fa9d1878fec5dd67569..37e186a408ff5f560b5878e3e51ea81ca5810bc7 100644
--- a/paddle/pybind/CMakeLists.txt
+++ b/paddle/pybind/CMakeLists.txt
@@ -4,6 +4,7 @@ cc_library(paddle_pybind SHARED
     DEPS pybind python backward
     sgd_op
     gather_op
+    scatter_op
     add_op
     mul_op
     rowwise_add_op
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 8fa8be2cef5fff04ed61ac726e5d8111e30c8a09..3bc150ccb7af2885439cc2344aa0db9ba3b1ca03 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -47,6 +47,7 @@ USE_OP(scale);
 USE_OP_ITSELF(identity);
 USE_OP(minus);
 USE_CPU_ONLY_OP(gather);
+USE_CPU_ONLY_OP(scatter);
 
 namespace paddle {
 namespace framework {
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index fb4686889a644753afdeb748b444e757ed016eda..661ebd89648feec77367c278e5f045b8238e1dc1 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
 py_test(test_softmax_op SRCS test_softmax_op.py)
 py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
 py_test(test_gather_op SRCS test_gather_op.py)
+py_test(test_scatter_op SRCS test_scatter_op.py)
 py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
 
 py_test(gradient_checker SRCS gradient_checker.py)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index d452197ce94eff4dbd773c403007af91ff88c002..9a7a7fbf5e63d4e433576f8e980c41c72fa26cab 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -32,7 +32,8 @@ def get_numeric_gradient(op,
                          output_name,
                          input_to_check,
                          delta=0.005,
-                         local_scope=None):
+                         local_scope=None,
+                         in_place=False):
     """
     Get Numeric Gradient for an operator's input.
     
@@ -81,6 +82,11 @@ def get_numeric_gradient(op,
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
 
+    def restore_inputs():
+        for var_name in input_values:
+            tensor_ = local_scope.find_var(var_name).get_tensor()
+            tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
+
     # get the input tensor that we want to get it's numeric gradient.
     tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
     tensor_size = product(tensor_to_check.get_dims())
@@ -90,6 +96,8 @@ def get_numeric_gradient(op,
     # we only compute gradient of one element each time.
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
+        if in_place:
+            restore_inputs()
         # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
 
@@ -99,6 +107,8 @@ def get_numeric_gradient(op,
         y_pos = get_output()
 
         # plus delta to this element, run op and get the sum of the result tensor.
+        if in_place:
+            restore_inputs()
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()
@@ -251,6 +261,7 @@ class GradientChecker(unittest.TestCase):
                    output_name,
                    no_grad_set=None,
                    only_cpu=False,
+                   in_place=False,
                    max_relative_error=0.005):
         """
         :param forward_op: used to create backward_op
@@ -283,7 +294,8 @@ class GradientChecker(unittest.TestCase):
 
         # get numerical gradients
         numeric_grads = [
-            get_numeric_gradient(forward_op, input_vars, output_name, name)
+            get_numeric_gradient(
+                forward_op, input_vars, output_name, name, in_place=in_place)
             for name in inputs_to_check
         ]
 
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
index e86898304252d08be718e40fed46c5e921596af7..e3de3fd0a1dddb3edb0de5987bd71d8a176d97ef 100644
--- a/python/paddle/v2/framework/tests/test_gather_op.py
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -21,12 +21,9 @@ class TestGatherOp(unittest.TestCase):
 
 class TestGatherGradOp(GradientChecker):
     def test_gather_grad(self):
-        print 'creating op'
         op = create_op("gather")
-        print 'creating op done'
         xnp = numpy.random.random((10, 20)).astype("float32")
         inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")}
-        print 'correct before check gradient'
         self.check_grad(op, inputs, set("X"), "Out")
 
 
diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1f9444889372104e39ded78fc7207a59b80a293
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_scatter_op.py
@@ -0,0 +1,38 @@
+import unittest
+from op_test_util import OpTestMeta
+from gradient_checker import GradientChecker, create_op
+import numpy
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+
+
+class TestScatterOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "scatter"
+        ref_np = numpy.ones((3, 3)).astype("float32")
+        index_np = numpy.array([1, 2]).astype("int32")
+        updates_np = numpy.random.random((2, 3)).astype("float32")
+        output_np = numpy.copy(ref_np)
+        output_np[index_np] += updates_np
+        self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
+        self.outputs = {'Out': output_np}
+
+
+class TestScatterGradOp(GradientChecker):
+    def test_scatter_grad(self):
+        op = create_op("scatter")
+        # test data setup
+        ref_np = numpy.ones((3, 10)).astype("float32")
+        index_np = numpy.array([1, 2]).astype("int32")
+        updates_np = numpy.random.random((2, 10)).astype("float32")
+        output_np = numpy.copy(ref_np)
+        output_np[index_np] += updates_np
+        inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
+        self.check_grad(
+            op, inputs, set(["Updates", "Ref"]), "Out", in_place=True)
+
+
+if __name__ == "__main__":
+    unittest.main()