diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 6601918c90b967a659bceb8535fa0b53844b6f43..f6df89369c52797f7269c41f635756582fadbc47 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -50,5 +50,6 @@ cc_library(paddle_pybind SHARED
     cross_entropy_op
     recurrent_op
     uniform_random_op
+    gaussian_random_op
     fill_zeros_like_op)
 endif(WITH_PYTHON)
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index 915ffb1c007c24eca457a0c646378bc191a15810..5fd6754e56caf0952e350200eff7be835900c962 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/operators/net_op.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
+#include "paddle/string/to_string.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@@ -39,7 +40,9 @@ USE_OP(softmax);
 USE_OP(rowwise_add);
 USE_OP(fill_zeros_like);
 USE_OP_WITHOUT_KERNEL(recurrent_op);
+USE_OP(gaussian_random);
 USE_OP(uniform_random);
+
 namespace paddle {
 namespace framework {
 
@@ -205,9 +208,13 @@ All parameter, weight, gradient are variables in Paddle.
                   });
   // clang-format on
 
-  py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>());
+  py::class_<platform::GPUPlace>(m, "GPUPlace")
+      .def(py::init<int>())
+      .def("__str__", string::to_string<const platform::GPUPlace &>);
 
-  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>());
+  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
+      .def(py::init<>())
+      .def("__str__", string::to_string<const platform::CPUPlace &>);
 
   py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
       m, "Operator");
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 9e4026d1c66ccd30ecfc37b3e819241cb85b1a1a..b3399aaf0fb864857ecbf19a7ebeb498b29510f5 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -41,25 +41,25 @@ function(op_library TARGET)
     endif()
 endfunction()
 
+cc_test(gather_test SRCS gather_test.cc DEPS tensor)
+
 cc_library(net_op SRCS net_op.cc DEPS op_registry)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 
 op_library(add_op SRCS add_op.cc add_op.cu)
-cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
 
 op_library(mean_op SRCS mean_op.cc mean_op.cu)
-cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op)
 
 op_library(mul_op SRCS mul_op.cc mul_op.cu)
 op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
 
 op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
 op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
+op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu)
 op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
 op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
 
 op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
-cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op)
 
 op_library(fc_op
     SRCS fc_op.cc
diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc
deleted file mode 100644
index bf529defb20d27200a28666278db8607b986e2d5..0000000000000000000000000000000000000000
--- a/paddle/operators/add_op_test.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-#define private public
-#include "paddle/framework/op_registry.h"
-
-USE_OP(add_two);
-
-TEST(AddOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("add_two");
-  ASSERT_NE(it, protos.end());
-  auto& op_creators = paddle::framework::OpRegistry::op_creators();
-  auto it1 = op_creators.find("add_two_grad");
-  ASSERT_NE(it1, op_creators.end());
-}
diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h
new file mode 100644
index 0000000000000000000000000000000000000000..0c73717d38aca9f3430e66cafc3ecccdd2eec776
--- /dev/null
+++ b/paddle/operators/gather.h
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <memory.h>
+#include <cstring>
+
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/place.h"
+
+namespace paddle {
+namespace operators {
+
+// Implementation of CPU copy
+template <typename T>
+void CPUGather(const T* params, const int* indices, const int slice_size,
+               const int index_size, T* output) {
+  const size_t slice_bytes = slice_size * sizeof(T);
+
+  for (size_t i = 0; i < index_size; ++i) {
+    int index_ = indices[i];
+    memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes);
+  }
+}
+
+// Implementation of GPU copy:
+template <typename T>
+void GPUGather(const T* src, const int* index, const int slice_size,
+               const int index_size, T* output);
+
+/**
+ * Return a new tensor from source tensor, gathered according to index
+ * input[src]: type-T source Tensor
+ * input[index]: type-int index Tensor (1-D)
+ * return: output tensor
+ */
+template <typename T>
+void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
+            const paddle::framework::Tensor* index,
+            paddle::framework::Tensor* output) {
+  // check index of shape 1-D
+  PADDLE_ENFORCE(index->dims().size() == 1);
+  int index_size = index->dims()[0];
+
+  auto src_dims = src->dims();
+  paddle::framework::DDim output_dims(src_dims);
+  output_dims[0] = index_size;
+
+  // slice size
+  int slice_size = 1;
+  for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+
+  // Gathering
+  if (platform::is_cpu_place(place)) {
+    CPUGather<T>(src->data<T>(), index->data<int>(), slice_size, index_size,
+                 output->data<T>());
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5de748ec461e4b1a34b75b57c9cd7d5bc9326059
--- /dev/null
+++ b/paddle/operators/gather_test.cc
@@ -0,0 +1,48 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/gather.h"
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/place.h"
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <string>
+
+TEST(Gather, GatherData) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  using namespace paddle::operators;
+
+  Tensor* src = new Tensor();
+  Tensor* index = new Tensor();
+  Tensor* output = new Tensor();
+
+  int* p_src = nullptr;
+  int* p_index = nullptr;
+  p_src = src->mutable_data<int>(make_ddim({3, 4}), CPUPlace());
+  p_index = index->mutable_data<int>(make_ddim({2}), CPUPlace());
+
+  for (size_t i = 0; i < 12; ++i) p_src[i] = i;
+  p_index[0] = 1;
+  p_index[1] = 0;
+
+  int* p_output = output->mutable_data<int>(make_ddim({2, 4}), CPUPlace());
+
+  Gather<int>(CPUPlace(), src, index, output);
+
+  for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
+  for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
+}
diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ef417ae2f06e8a9f10aed80674015e2ee448f4a3
--- /dev/null
+++ b/paddle/operators/gaussian_random_op.cc
@@ -0,0 +1,82 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include <random>
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class GaussianRandomKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    float mean = context.op_.GetAttr<float>("mean");
+    float std = context.op_.GetAttr<float>("std");
+    auto* tensor = context.Output<framework::Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
+    // TODO(dzh): attribute does not support unsigned int.
+    // And we need a global random seed configuration.
+    int seed = context.op_.GetAttr<int>("seed");
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    std::mt19937 g(seed);
+    std::normal_distribution<T> distribution(mean, std);
+    ssize_t size = framework::product(tensor->dims());
+    for (int i = 0; i < size; ++i) {
+      data[i] = distribution(g);
+    }
+  }
+};
+
+class GaussianRandomOp : public framework::OperatorWithKernel {
+ protected:
+  void InferShape(const framework::InferShapeContext& context) const override {
+    auto* tensor = context.Output<framework::Tensor>(0);
+    auto dims = GetAttr<std::vector<int>>("dims");
+    PADDLE_ENFORCE(dims.size() > 0UL,
+                   "dims can be one int or array. dims must be set.");
+    tensor->Resize(framework::make_ddim(dims));
+  }
+};
+
+class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  GaussianRandomOpMaker(framework::OpProto* proto,
+                        framework::OpAttrChecker* op_checker)
+      : framework::OpProtoAndCheckerMaker(proto, op_checker) {
+    AddOutput("Out", "output matrix of random op");
+    AddComment(R"DOC(
+GaussianRandom operator.
+Use to initialize tensor with gaussian random generator.
+)DOC");
+
+    AddAttr<std::vector<int>>("dims", "The dimension of random tensor.");
+    AddAttr<float>("mean", "mean value of random.").SetDefault(.0f);
+    AddAttr<float>("std", "minimum value of random value.").SetDefault(1.0f);
+    AddAttr<int>("seed",
+                 "Random seed of generator."
+                 "0 means use system wide seed")
+        .SetDefault(0);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker);
+REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..54e4ae5d2b255f72582b9826685bfacf6c565fab
--- /dev/null
+++ b/paddle/operators/gaussian_random_op.cu
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include <memory>
+#include <random>
+#include "paddle/platform/dynload/curand.h"
+#include "paddle/platform/gpu_info.h"
+
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class GaussianRandomKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    float mean = context.op_.GetAttr<float>("mean");
+    float std = context.op_.GetAttr<float>("std");
+    auto* tensor = context.Output<framework::Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
+    int seed = context.op_.GetAttr<int>("seed");
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    curandGenerator_t g;
+    PADDLE_ENFORCE(platform::dynload::curandCreateGenerator(
+        &g, CURAND_RNG_PSEUDO_DEFAULT));
+    PADDLE_ENFORCE(
+        platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed));
+    curandGenerateNormal(g, data, framework::product(tensor->dims()), mean,
+                         std);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
\ No newline at end of file
diff --git a/paddle/operators/mean_op_test.cc b/paddle/operators/mean_op_test.cc
deleted file mode 100644
index 375dcd50e130355c60f82b9d39d1b94fb2c911b0..0000000000000000000000000000000000000000
--- a/paddle/operators/mean_op_test.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <paddle/framework/op_registry.h>
-
-USE_OP(mean);
-
-TEST(MeanOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("mean");
-  ASSERT_NE(it, protos.end());
-}
diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc
deleted file mode 100644
index 75137259f5e608b259b073101353e5818bb17c92..0000000000000000000000000000000000000000
--- a/paddle/operators/sgd_op_test.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-#include <paddle/framework/op_registry.h>
-USE_OP(sgd);
-TEST(SGDOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("sgd");
-  ASSERT_NE(it, protos.end());
-}
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index 10659caa882fd3d4060f9947413a392c3b681ee8..f6850e06512d196d51e454bc22cfa3cda8bba84a 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -21,5 +21,8 @@ py_test(gradient_checker SRCS gradient_checker.py)
 py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
 
 py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
+
 py_test(test_operator SRCS test_operator.py)
+
+py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
 py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index b73c4869d14a62a951d8e45dafb14b7523355519..aacc5e88feeb65e08093a35ef85837c916cfd39e 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -92,15 +92,27 @@ def get_numeric_gradient(op,
 
 
 class GradientChecker(unittest.TestCase):
-    def __is_close(self, numeric_grads, scope, max_relative_error):
+    def assert_is_close(self, numeric_grads, scope, max_relative_error,
+                        msg_prefix):
         for name in numeric_grads:
-            op_grad = numpy.array(
-                scope.find_var(grad_var_name(name)).get_tensor())
-            is_close = numpy.allclose(
-                numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
-            if not is_close:
-                return False
-        return True
+            b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
+            a = numeric_grads[name]
+
+            abs_a = numpy.abs(a)
+            # if abs_a is nearly zero, then use abs error for a, not relative
+            # error.
+            abs_a[abs_a < 1e-3] = 1
+
+            diff_mat = numpy.abs(a - b) / abs_a
+            max_diff = numpy.max(diff_mat)
+
+            def err_msg():
+                offset = numpy.argmax(diff_mat > max_relative_error)
+                return "%s Variable %s max gradient diff %f over limit %f, the first " \
+                       "error element is %d" % (
+                       msg_prefix, name, max_diff, max_relative_error, offset)
+
+            self.assertLessEqual(max_diff, max_relative_error, err_msg())
 
     def check_grad(self,
                    forward_op,
@@ -145,7 +157,8 @@ class GradientChecker(unittest.TestCase):
         # get numeric gradient
         for check_name in inputs_to_check:
             numeric_grad[check_name] = \
-                get_numeric_gradient(forward_op, input_vars, output_name, check_name)
+                get_numeric_gradient(forward_op, input_vars, output_name,
+                                     check_name)
 
         # get operator gradient according to different device
         for place in places:
@@ -187,15 +200,8 @@ class GradientChecker(unittest.TestCase):
             backward_op.infer_shape(scope)
             backward_op.run(scope, ctx)
 
-            if isinstance(place, core.CPUPlace):
-                msg = "CPU kernel gradient is not close to numeric gradient"
-            else:
-                if isinstance(place, core.GPUPlace):
-                    msg = "GPU kernel gradient is not close to numeric gradient"
-                else:
-                    raise ValueError("unknown place " + type(place))
-            self.assertTrue(
-                self.__is_close(numeric_grad, scope, max_relative_error), msg)
+            self.assert_is_close(numeric_grad, scope, max_relative_error,
+                                 "Gradient Check On %s" % str(place))
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..f95ed70b58d611b3233a21d3f2a34c864ae4d1b3
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
@@ -0,0 +1,36 @@
+import unittest
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+import numpy
+
+
+class GaussianRandomTest(unittest.TestCase):
+    def test_cpu(self):
+        self.gaussian_random_test(place=core.CPUPlace())
+
+    def test_gpu(self):
+        if core.is_compile_gpu():
+            self.gaussian_random_test(place=core.GPUPlace(0))
+
+    def gaussian_random_test(self, place):
+        scope = core.Scope()
+        scope.new_var("Out").get_tensor()
+
+        op = Operator(
+            "gaussian_random",
+            Out="Out",
+            dims=[1000, 784],
+            mean=.0,
+            std=1.,
+            seed=10)
+
+        op.infer_shape(scope)
+        context = core.DeviceContext.create(place)
+        op.run(scope, context)
+        tensor = numpy.array(scope.find_var("Out").get_tensor())
+        self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
+        self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
+
+
+if __name__ == '__main__':
+    unittest.main()