diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index 6601918c90b967a659bceb8535fa0b53844b6f43..f6df89369c52797f7269c41f635756582fadbc47 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -50,5 +50,6 @@ cc_library(paddle_pybind SHARED
     cross_entropy_op
     recurrent_op
     uniform_random_op
+    gaussian_random_op
     fill_zeros_like_op)
 endif(WITH_PYTHON)
diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index 47983110fa618e89d455a311af2112fc0ff2b9ae..437a44a8aafa650d654a1a77c60613abe07679fe 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -133,8 +133,9 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
     std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp);
     for (std::string& grad_input : grad_op->inputs_) {
       if (no_grad_names.count(grad_input)) {
-        std::string prefix =
-            grad_input.substr(0, grad_input.size() - kGradVarSuffix.size());
+        // +1 for \0
+        std::string prefix = grad_input.substr(
+            0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
         grad_input = prefix + kZeroVarSuffix;
 
         // If part of input gradient of that operator is not calculated, fill
@@ -167,7 +168,7 @@ std::shared_ptr<OperatorBase> Backward(
   std::unordered_set<std::string> no_grad_names;
   no_grad_names.reserve(no_grad_vars.size());
 
-  no_grad_names.insert(kEmptyVarName + kGradVarSuffix);
+  no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
 
   for (auto& name : no_grad_vars) {
     no_grad_names.insert(name + kGradVarSuffix);
diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc
index 6d5835bd2236118b6aff95743c4319faceb05d89..1677a3ed4c85ef293f0aadc64a4caa809cbd6ced 100644
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -171,10 +171,10 @@ TEST(Backward, simple_op_grad) {
   ASSERT_EQ(4UL, gop->inputs_.size());
   ASSERT_EQ(f::kEmptyVarName, gop->inputs_[0]);
   ASSERT_EQ("rowwise_add_grad", gop->type_);
-  ASSERT_EQ("X" + f::kGradVarSuffix, gop->outputs_[0]);
-  ASSERT_EQ("b" + f::kGradVarSuffix, gop->outputs_[1]);
+  ASSERT_EQ(f::GradVarName("X"), gop->outputs_[0]);
+  ASSERT_EQ(f::GradVarName("b"), gop->outputs_[1]);
 
-  ASSERT_EQ("X" + f::kGradVarSuffix, gop->Output("X" + f::kGradVarSuffix));
+  ASSERT_EQ(f::GradVarName("X"), gop->Output(f::GradVarName("X")));
 }
 
 TEST(Backward, simple_op_not_need_grad) {
@@ -182,7 +182,7 @@ TEST(Backward, simple_op_not_need_grad) {
   ASSERT_NE(fwd, nullptr);
   auto gop = f::Backward(*fwd, {"X"});
   ASSERT_EQ(std::find(gop->outputs_.begin(), gop->outputs_.end(),
-                      "X" + f::kGradVarSuffix),
+                      f::GradVarName("X")),
             gop->outputs_.end());
 
   auto no_input_gop = f::Backward(*fwd, {"X", "b"});
@@ -250,18 +250,18 @@ TEST(Backward, net_input_of_network_not_need_grad) {
   all_output.erase(f::kEmptyVarName);
 
   for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
-    ASSERT_NE(all_output.find(out + f::kGradVarSuffix), all_output.end());
+    ASSERT_NE(all_output.find(f::GradVarName(out)), all_output.end());
   }
 
   // Not Generated X
-  ASSERT_EQ(all_output.find("X" + f::kGradVarSuffix), all_output.end());
+  ASSERT_EQ(all_output.find(f::GradVarName("X")), all_output.end());
 
   ASSERT_EQ(2UL, bwd_net->ops_.size());
   ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
   auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get());
   ASSERT_EQ(3UL, first_fc_grad->ops_.size());
   ASSERT_EQ(f::kEmptyVarName,
-            first_fc_grad->ops_[2]->Output("A" + f::kGradVarSuffix));
+            first_fc_grad->ops_[2]->Output(f::GradVarName("A")));
 }
 
 TEST(Backward, net_shared_weight) {
@@ -313,15 +313,15 @@ TEST(Backward, op_part_of_output_are_not_need) {
   ASSERT_EQ(1UL, fill_zero.inputs_.size());
   ASSERT_EQ("Z", fill_zero.inputs_[0]);
   ASSERT_EQ(1UL, fill_zero.outputs_.size());
-  ASSERT_EQ("Z" + f::kZeroVarSuffix, fill_zero.outputs_[0]);
+  ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.outputs_[0]);
 
   auto &d_many_out = *net->ops_[1];
   ASSERT_EQ("many_output_op_grad", d_many_out.type_);
   ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.inputs_.size());  // I/O/OG
-  ASSERT_EQ("Z" + f::kZeroVarSuffix, d_many_out.Input("z" + f::kGradVarSuffix));
-  ASSERT_EQ("Y" + f::kGradVarSuffix, d_many_out.Input("y" + f::kGradVarSuffix));
-  ASSERT_EQ("X" + f::kGradVarSuffix,
-            d_many_out.Output("x" + f::kGradVarSuffix));
+  ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix,
+            d_many_out.Input(f::GradVarName("z")));
+  ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y")));
+  ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x")));
 }
 
 TEST(Backward, op_part_of_input_are_not_need) {
@@ -331,10 +331,9 @@ TEST(Backward, op_part_of_input_are_not_need) {
   ASSERT_EQ(grad_mul.type_, "mul_grad");
   ASSERT_EQ(grad_mul.inputs_.size(), 2UL + 1UL + 1UL);
   ASSERT_EQ(grad_mul.outputs_.size(), 2UL);
-  ASSERT_EQ(grad_mul.Output("A" + f::kGradVarSuffix), f::kEmptyVarName);
-  ASSERT_EQ(grad_mul.Output("B" + f::kGradVarSuffix), "b" + f::kGradVarSuffix);
-  ASSERT_EQ(grad_mul.Input("Out" + f::kGradVarSuffix),
-            "out" + f::kGradVarSuffix);
+  ASSERT_EQ(grad_mul.Output(f::GradVarName("A")), f::kEmptyVarName);
+  ASSERT_EQ(grad_mul.Output(f::GradVarName("B")), f::GradVarName("b"));
+  ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out"));
   ASSERT_EQ(grad_mul.Input("A"), "a");
   ASSERT_EQ(grad_mul.Input("B"), "b");
   ASSERT_EQ(grad_mul.Input("Out"), "out");
diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc
index cf7143eba4460e5619188b82ffe23db11a04a236..f1ebbae52f13d9c0fc9408aec8c4160575ad59c0 100644
--- a/paddle/framework/grad_op_builder_test.cc
+++ b/paddle/framework/grad_op_builder_test.cc
@@ -83,21 +83,19 @@ TEST(GradOpBuilder, MutiInOut) {
   EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
   EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
             std::vector<std::string>({"out2_1", "out2_2"}));
-  EXPECT_EQ(grad_test_op->Input("Out1" + f::kGradVarSuffix),
-            "out1" + f::kGradVarSuffix);
-  EXPECT_EQ(grad_test_op->Inputs("Out2_mult" + f::kGradVarSuffix),
+  EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")),
+            f::GradVarName("out1"));
+  EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")),
             std::vector<std::string>(
-                {"out2_1" + f::kGradVarSuffix, "out2_2" + f::kGradVarSuffix}));
+                {f::GradVarName("out2_1"), f::GradVarName("out2_2")}));
 
   ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
-            "in1" + f::kGradVarSuffix);
-  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
-            std::vector<std::string>({"in2_1" + f::kGradVarSuffix,
-                                      "in2_2" + f::kGradVarSuffix,
-                                      "in2_3" + f::kGradVarSuffix}));
-  EXPECT_EQ(grad_test_op->Output("In3" + f::kGradVarSuffix),
-            "in3" + f::kGradVarSuffix);
+  EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
+  EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
+            std::vector<std::string>({f::GradVarName("in2_1"),
+                                      f::GradVarName("in2_2"),
+                                      f::GradVarName("in2_3")}));
+  EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3"));
 }
 
 TEST(GradOpBuilder, IOIgnoredInGradient) {
@@ -119,19 +117,18 @@ TEST(GradOpBuilder, IOIgnoredInGradient) {
   EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
             std::vector<std::string>({"out1_1", "out1_2"}));
   EXPECT_EQ(grad_test_op->Input("Out2"), f::kEmptyVarName);
-  EXPECT_EQ(grad_test_op->Inputs("Out1_mult" + f::kGradVarSuffix),
+  EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")),
             std::vector<std::string>(
-                {"out1_1" + f::kGradVarSuffix, "out1_2" + f::kGradVarSuffix}));
-  EXPECT_EQ(grad_test_op->Input("Out2" + f::kGradVarSuffix),
-            "out2" + f::kGradVarSuffix);
+                {f::GradVarName("out1_1"), f::GradVarName("out1_2")}));
+  EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")),
+            f::GradVarName("out2"));
 
   ASSERT_EQ(grad_test_op->outputs_.size(), 5UL);
-  EXPECT_EQ(grad_test_op->Output("In1" + f::kGradVarSuffix),
-            "in1" + f::kGradVarSuffix);
-  EXPECT_EQ(grad_test_op->Outputs("In2_mult" + f::kGradVarSuffix),
+  EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
+  EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
             std::vector<std::string>(
-                {"in2_1" + f::kGradVarSuffix, "in2_2" + f::kGradVarSuffix}));
-  EXPECT_EQ(grad_test_op->Outputs("In3_mult" + f::kGradVarSuffix),
+                {f::GradVarName("in2_1"), f::GradVarName("in2_2")}));
+  EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")),
             std::vector<std::string>(
-                {"in3_1" + f::kGradVarSuffix, "in3_2" + f::kGradVarSuffix}));
+                {f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
 }
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index bf2ce18edc392bf4d2e67f6b28d023bf22bcb62b..629cb96907e424a0cbc4b9f1831cb5b1cc2980fc 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -33,19 +33,19 @@ namespace paddle {
 namespace framework {
 
 /// If a variable is a empty variable, that name will be used.
-const std::string kEmptyVarName = "@EMPTY@";
+constexpr char kEmptyVarName[] = "@EMPTY@";
 
 /// If a variable is a temporary variable, that name will be set in Python,
 /// but it will be convert to a unique name in scope after OpCreator.
-const std::string kTempVarName = "@TEMP@";
+constexpr char kTempVarName[] = "@TEMP@";
 
 /// If a variable's name has a certain suffix, it means that the
 /// variable is the gradient of another varibale.
 /// e.g. Variable "x@GRAD" is the gradient of varibale "x".
-const std::string kGradVarSuffix = "@GRAD";
+constexpr char kGradVarSuffix[] = "@GRAD";
 
 /// Variables with this suffix are supposed to be filled up with zeros.
-const std::string kZeroVarSuffix = "@ZERO";
+constexpr char kZeroVarSuffix[] = "@ZERO";
 
 inline std::string GradVarName(const std::string& var_name) {
   return var_name + kGradVarSuffix;
diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index 915ffb1c007c24eca457a0c646378bc191a15810..5fd6754e56caf0952e350200eff7be835900c962 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/operators/net_op.h"
 #include "paddle/platform/enforce.h"
 #include "paddle/platform/place.h"
+#include "paddle/string/to_string.h"
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
@@ -39,7 +40,9 @@ USE_OP(softmax);
 USE_OP(rowwise_add);
 USE_OP(fill_zeros_like);
 USE_OP_WITHOUT_KERNEL(recurrent_op);
+USE_OP(gaussian_random);
 USE_OP(uniform_random);
+
 namespace paddle {
 namespace framework {
 
@@ -205,9 +208,13 @@ All parameter, weight, gradient are variables in Paddle.
                   });
   // clang-format on
 
-  py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>());
+  py::class_<platform::GPUPlace>(m, "GPUPlace")
+      .def(py::init<int>())
+      .def("__str__", string::to_string<const platform::GPUPlace &>);
 
-  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>());
+  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace")
+      .def(py::init<>())
+      .def("__str__", string::to_string<const platform::CPUPlace &>);
 
   py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
       m, "Operator");
diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp
index a51fe390c74d74cd5f3d07df62b715b239335548..308abe6816428bc0f98ec32e892622fa4a23b1ae 100644
--- a/paddle/gserver/tests/test_KmaxSeqScore.cpp
+++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp
@@ -96,6 +96,11 @@ TEST(Layer, kmaxSeqScoreLayer) {
   MatrixPtr inValue =
       Matrix::create(subSeqStartPosition.back(), 1, false, false);
 
+  std::vector<bool> mode = {false};
+#ifndef PADDLE_ONLY_CPU
+  mode.push_back(true);
+#endif
+
   for (auto hasSubseq : {false, true}) {
     vector<vector<int>> groundTruth;
     inValue->randomizeUniform();
@@ -104,7 +109,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
                          hasSubseq ? subSeqStartPosition : seqStartPosition,
                          beamSize);
 
-    for (auto useGpu : {false, true}) {
+    for (auto useGpu : mode) {
       TestConfig config;
       config.layerConfig.set_type("kmax_seq_score");
       config.layerConfig.set_beam_size(beamSize);
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 7085905c2a7094dfd676135166372986f2eb1ba8..f15fb4f854d361d8a3f056e3f7d1cb5c09bf56fb 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -42,26 +42,25 @@ function(op_library TARGET)
 endfunction()
 
 add_subdirectory(math)
+cc_test(gather_test SRCS gather_test.cc DEPS tensor)
 
 cc_library(net_op SRCS net_op.cc DEPS op_registry)
 cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
 
 op_library(add_op SRCS add_op.cc add_op.cu)
-cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
 
 op_library(mean_op SRCS mean_op.cc mean_op.cu)
-cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op)
 
 op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function)
 op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
 
 op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
 op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
+op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu)
 op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
 op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
 
 op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
-cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op)
 
 op_library(fc_op
     SRCS fc_op.cc
diff --git a/paddle/operators/add_op_test.cc b/paddle/operators/add_op_test.cc
deleted file mode 100644
index bf529defb20d27200a28666278db8607b986e2d5..0000000000000000000000000000000000000000
--- a/paddle/operators/add_op_test.cc
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-#define private public
-#include "paddle/framework/op_registry.h"
-
-USE_OP(add_two);
-
-TEST(AddOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("add_two");
-  ASSERT_NE(it, protos.end());
-  auto& op_creators = paddle::framework::OpRegistry::op_creators();
-  auto it1 = op_creators.find("add_two_grad");
-  ASSERT_NE(it1, op_creators.end());
-}
diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h
new file mode 100644
index 0000000000000000000000000000000000000000..0c73717d38aca9f3430e66cafc3ecccdd2eec776
--- /dev/null
+++ b/paddle/operators/gather.h
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <memory.h>
+#include <cstring>
+
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/place.h"
+
+namespace paddle {
+namespace operators {
+
+// Implementation of CPU copy
+template <typename T>
+void CPUGather(const T* params, const int* indices, const int slice_size,
+               const int index_size, T* output) {
+  const size_t slice_bytes = slice_size * sizeof(T);
+
+  for (size_t i = 0; i < index_size; ++i) {
+    int index_ = indices[i];
+    memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes);
+  }
+}
+
+// Implementation of GPU copy:
+template <typename T>
+void GPUGather(const T* src, const int* index, const int slice_size,
+               const int index_size, T* output);
+
+/**
+ * Return a new tensor from source tensor, gathered according to index
+ * input[src]: type-T source Tensor
+ * input[index]: type-int index Tensor (1-D)
+ * return: output tensor
+ */
+template <typename T>
+void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
+            const paddle::framework::Tensor* index,
+            paddle::framework::Tensor* output) {
+  // check index of shape 1-D
+  PADDLE_ENFORCE(index->dims().size() == 1);
+  int index_size = index->dims()[0];
+
+  auto src_dims = src->dims();
+  paddle::framework::DDim output_dims(src_dims);
+  output_dims[0] = index_size;
+
+  // slice size
+  int slice_size = 1;
+  for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
+
+  // Gathering
+  if (platform::is_cpu_place(place)) {
+    CPUGather<T>(src->data<T>(), index->data<int>(), slice_size, index_size,
+                 output->data<T>());
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5de748ec461e4b1a34b75b57c9cd7d5bc9326059
--- /dev/null
+++ b/paddle/operators/gather_test.cc
@@ -0,0 +1,48 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/gather.h"
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/platform/place.h"
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <string>
+
+TEST(Gather, GatherData) {
+  using namespace paddle::framework;
+  using namespace paddle::platform;
+  using namespace paddle::operators;
+
+  Tensor* src = new Tensor();
+  Tensor* index = new Tensor();
+  Tensor* output = new Tensor();
+
+  int* p_src = nullptr;
+  int* p_index = nullptr;
+  p_src = src->mutable_data<int>(make_ddim({3, 4}), CPUPlace());
+  p_index = index->mutable_data<int>(make_ddim({2}), CPUPlace());
+
+  for (size_t i = 0; i < 12; ++i) p_src[i] = i;
+  p_index[0] = 1;
+  p_index[1] = 0;
+
+  int* p_output = output->mutable_data<int>(make_ddim({2, 4}), CPUPlace());
+
+  Gather<int>(CPUPlace(), src, index, output);
+
+  for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
+  for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
+}
diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ef417ae2f06e8a9f10aed80674015e2ee448f4a3
--- /dev/null
+++ b/paddle/operators/gaussian_random_op.cc
@@ -0,0 +1,82 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include <random>
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class GaussianRandomKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    float mean = context.op_.GetAttr<float>("mean");
+    float std = context.op_.GetAttr<float>("std");
+    auto* tensor = context.Output<framework::Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
+    // TODO(dzh): attribute does not support unsigned int.
+    // And we need a global random seed configuration.
+    int seed = context.op_.GetAttr<int>("seed");
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    std::mt19937 g(seed);
+    std::normal_distribution<T> distribution(mean, std);
+    ssize_t size = framework::product(tensor->dims());
+    for (int i = 0; i < size; ++i) {
+      data[i] = distribution(g);
+    }
+  }
+};
+
+class GaussianRandomOp : public framework::OperatorWithKernel {
+ protected:
+  void InferShape(const framework::InferShapeContext& context) const override {
+    auto* tensor = context.Output<framework::Tensor>(0);
+    auto dims = GetAttr<std::vector<int>>("dims");
+    PADDLE_ENFORCE(dims.size() > 0UL,
+                   "dims can be one int or array. dims must be set.");
+    tensor->Resize(framework::make_ddim(dims));
+  }
+};
+
+class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  GaussianRandomOpMaker(framework::OpProto* proto,
+                        framework::OpAttrChecker* op_checker)
+      : framework::OpProtoAndCheckerMaker(proto, op_checker) {
+    AddOutput("Out", "output matrix of random op");
+    AddComment(R"DOC(
+GaussianRandom operator.
+Use to initialize tensor with gaussian random generator.
+)DOC");
+
+    AddAttr<std::vector<int>>("dims", "The dimension of random tensor.");
+    AddAttr<float>("mean", "mean value of random.").SetDefault(.0f);
+    AddAttr<float>("std", "minimum value of random value.").SetDefault(1.0f);
+    AddAttr<int>("seed",
+                 "Random seed of generator."
+                 "0 means use system wide seed")
+        .SetDefault(0);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker);
+REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..54e4ae5d2b255f72582b9826685bfacf6c565fab
--- /dev/null
+++ b/paddle/operators/gaussian_random_op.cu
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include <memory>
+#include <random>
+#include "paddle/platform/dynload/curand.h"
+#include "paddle/platform/gpu_info.h"
+
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class GaussianRandomKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    float mean = context.op_.GetAttr<float>("mean");
+    float std = context.op_.GetAttr<float>("std");
+    auto* tensor = context.Output<framework::Tensor>(0);
+    T* data = tensor->mutable_data<T>(context.GetPlace());
+
+    int seed = context.op_.GetAttr<int>("seed");
+    if (seed == 0) {
+      seed = std::random_device()();
+    }
+    curandGenerator_t g;
+    PADDLE_ENFORCE(platform::dynload::curandCreateGenerator(
+        &g, CURAND_RNG_PSEUDO_DEFAULT));
+    PADDLE_ENFORCE(
+        platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed));
+    curandGenerateNormal(g, data, framework::product(tensor->dims()), mean,
+                         std);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel<float>);
\ No newline at end of file
diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc
index 7827c213fec953a5c7f403e536199fb3d8b80d7d..10382211436f742b4daffbd05d10db8fd232a218 100644
--- a/paddle/operators/math/math_function.cc
+++ b/paddle/operators/math/math_function.cc
@@ -73,7 +73,7 @@ void matmul<platform::CPUPlace, float>(const framework::Tensor& in1, bool in1_T,
   int K = in1_dim[1];
 
   CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
-  CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans;
 
   gemm<platform::CPUPlace, float>(in1_Trans, in2_Trans, M, N, K, alpha,
                                   in1.data<float>(), in2.data<float>(), beta,
@@ -106,7 +106,7 @@ void matmul<platform::CPUPlace, double>(const framework::Tensor& in1,
   int N = out_dim[1];
   int K = in1_dim[1];
   CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
-  CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans;
 
   gemm<platform::CPUPlace, double>(in1_Trans, in2_Trans, M, N, K, alpha,
                                    in1.data<double>(), in2.data<double>(), beta,
diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu
index 50fc9939b1a425a10787a4f2ce5c441b6e2da672..f4d238e8ab8721025e8fabc6933008dc55d3056a 100644
--- a/paddle/operators/math/math_function.cu
+++ b/paddle/operators/math/math_function.cu
@@ -85,7 +85,7 @@ void matmul<platform::GPUPlace, float>(const framework::Tensor& in1, bool in1_T,
   int K = in1_dim[1];
 
   CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
-  CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans;
 
   gemm<platform::GPUPlace, float>(in1_Trans, in2_Trans, M, N, K, alpha,
                                   in1.data<float>(), in2.data<float>(), beta,
@@ -118,7 +118,7 @@ void matmul<platform::GPUPlace, double>(const framework::Tensor& in1,
   int N = out_dim[1];
   int K = in1_dim[1];
   CBLAS_TRANSPOSE in1_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
-  CBLAS_TRANSPOSE in2_Trans = (in1_T == false) ? CblasNoTrans : CblasTrans;
+  CBLAS_TRANSPOSE in2_Trans = (in2_T == false) ? CblasNoTrans : CblasTrans;
 
   gemm<platform::GPUPlace, double>(in1_Trans, in2_Trans, M, N, K, alpha,
                                    in1.data<double>(), in2.data<double>(), beta,
diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc
index 997b0c514e96467dc9f9027829616c7b16fe43e1..2ea049cb3605f4dedabb992ebc0e8aa276ad5e9a 100644
--- a/paddle/operators/mean_op.cc
+++ b/paddle/operators/mean_op.cc
@@ -41,7 +41,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
 class MeanGradOp : public framework::OperatorWithKernel {
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
+    ctx.Output<Tensor>(framework::GradVarName("X"))
         ->Resize(ctx.Input<Tensor>("X")->dims());
   }
 };
diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h
index f3db0a29bb234948d180d964fb82057632ec4414..e8595a14faa7c1b03734f814c78f9cbf1819fbb5 100644
--- a/paddle/operators/mean_op.h
+++ b/paddle/operators/mean_op.h
@@ -48,10 +48,10 @@ template <typename Place, typename T>
 class MeanGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto OG = context.Input<Tensor>("Out" + framework::kGradVarSuffix);
+    auto OG = context.Input<Tensor>(framework::GradVarName("Out"));
     PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
                    "Mean Gradient should be scalar");
-    auto IG = context.Output<Tensor>("X" + framework::kGradVarSuffix);
+    auto IG = context.Output<Tensor>(framework::GradVarName("X"));
     IG->mutable_data<T>(context.GetPlace());
 
     T ig_size = (T)framework::product(IG->dims());
diff --git a/paddle/operators/mean_op_test.cc b/paddle/operators/mean_op_test.cc
deleted file mode 100644
index 375dcd50e130355c60f82b9d39d1b94fb2c911b0..0000000000000000000000000000000000000000
--- a/paddle/operators/mean_op_test.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <paddle/framework/op_registry.h>
-
-USE_OP(mean);
-
-TEST(MeanOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("mean");
-  ASSERT_NE(it, protos.end());
-}
diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc
deleted file mode 100644
index 75137259f5e608b259b073101353e5818bb17c92..0000000000000000000000000000000000000000
--- a/paddle/operators/sgd_op_test.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-#include <paddle/framework/op_registry.h>
-USE_OP(sgd);
-TEST(SGDOp, GetOpProto) {
-  auto& protos = paddle::framework::OpRegistry::protos();
-  auto it = protos.find("sgd");
-  ASSERT_NE(it, protos.end());
-}
diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h
index d2adb997de8e36922d5056b20f238a82eee74f8c..337a059fb1494d500be0fd2437e59c863ae1563c 100644
--- a/paddle/platform/enforce.h
+++ b/paddle/platform/enforce.h
@@ -15,11 +15,12 @@ limitations under the License. */
 #pragma once
 
 #include <execinfo.h>
-#include <paddle/string/printf.h>
 #include <iomanip>
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include "paddle/string/printf.h"
+#include "paddle/string/to_string.h"
 
 #ifndef PADDLE_ONLY_CPU
 
@@ -194,8 +195,8 @@ inline void throw_on_error(T e) {
 #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...)        \
   PADDLE_ENFORCE(__VAL0 __CMP __VAL1,                                         \
                  "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
-                 #__VAL0, #__VAL1, std::to_string(__VAL0),                    \
-                 std::to_string(__VAL1),                                      \
+                 #__VAL0, #__VAL1, paddle::string::to_string(__VAL0),         \
+                 paddle::string::to_string(__VAL1),                           \
                  paddle::string::Sprintf("" __VA_ARGS__));
 
 }  // namespace platform
diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc
index 5408fce558ab0d9c369aaba22374315fe553ce0f..80bdee3d9dfbe38ef707a6ba60cdb7f7b99714de 100644
--- a/paddle/platform/enforce_test.cc
+++ b/paddle/platform/enforce_test.cc
@@ -9,6 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <array>
+#include <iostream>
 #include <memory>
 
 #include "gtest/gtest.h"
@@ -83,7 +85,7 @@ TEST(ENFORCE_NE, FAIL) {
   } catch (paddle::platform::EnforceNotMet error) {
     caught_exception = true;
     EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
-                          "enforce 1.0 != 1UL failed, 1.000000 == 1"))
+                          "enforce 1.0 != 1UL failed, 1 == 1"))
         << error.what() << " does not have expected prefix";
   }
   EXPECT_TRUE(caught_exception);
@@ -176,3 +178,39 @@ TEST(ENFORCE_NOT_NULL, FAIL) {
   }
   EXPECT_TRUE(caught_exception);
 }
+
+struct Dims {
+  size_t dims_[4];
+
+  bool operator==(const Dims& o) const {
+    for (size_t i = 0; i < 4; ++i) {
+      if (dims_[i] != o.dims_[i]) return false;
+    }
+    return true;
+  }
+};
+
+std::ostream& operator<<(std::ostream& os, const Dims& d) {
+  for (size_t i = 0; i < 4; ++i) {
+    if (i == 0) {
+      os << "[";
+    }
+    os << d.dims_[i];
+    if (i == 4 - 1) {
+      os << "]";
+    } else {
+      os << ", ";
+    }
+  }
+  return os;
+}
+
+TEST(ENFORCE_USER_DEFINED_CLASS, EQ) {
+  Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}};
+  PADDLE_ENFORCE_EQ(a, b);
+}
+
+TEST(ENFORCE_USER_DEFINED_CLASS, NE) {
+  Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}};
+  ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet);
+}
\ No newline at end of file
diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt
index 5becf62672d0c606c98ea1a1a4383df97088ab05..60667b72873f9422aec1807972a81ab680de2e64 100644
--- a/paddle/string/CMakeLists.txt
+++ b/paddle/string/CMakeLists.txt
@@ -2,3 +2,4 @@ cc_library(stringpiece SRCS piece.cc)
 cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags)
 
 cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags)
+cc_test(to_string_test SRCS to_string_test.cc)
diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h
new file mode 100644
index 0000000000000000000000000000000000000000..4f478b6a36b23bdba8ef3ddae94b3eadf18716c2
--- /dev/null
+++ b/paddle/string/to_string.h
@@ -0,0 +1,40 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include <sstream>
+#include <string>
+
+namespace paddle {
+namespace string {
+template <typename T>
+inline std::string to_string(T v) {
+  std::ostringstream sout;
+  sout << v;
+  return sout.str();
+}
+
+// Faster std::string/const char* type
+template <>
+inline std::string to_string(std::string v) {
+  return v;
+}
+
+template <>
+inline std::string to_string(const char* v) {
+  return std::string(v);
+}
+
+}  // namespace string
+}  // namespace paddle
diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5ff1b007f1875c7b920a08bd13b8d98cdc5138d3
--- /dev/null
+++ b/paddle/string/to_string_test.cc
@@ -0,0 +1,39 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/string/to_string.h"
+#include <gtest/gtest.h>
+
+constexpr char kOutputString[] = "User Defined Output";
+class UserDefinedClass {
+public:
+};
+
+std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) {
+  s << kOutputString;
+  return s;
+}
+
+TEST(to_string, normal) {
+  using namespace paddle::string;
+  ASSERT_EQ("10", to_string(10));
+  ASSERT_EQ("abc", to_string("abc"));
+  ASSERT_EQ("1.2", to_string(1.2));
+}
+
+TEST(to_string, user_defined) {
+  using namespace paddle::string;
+  UserDefinedClass instance;
+  ASSERT_EQ(kOutputString, to_string(instance));
+}
\ No newline at end of file
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index 10659caa882fd3d4060f9947413a392c3b681ee8..f6850e06512d196d51e454bc22cfa3cda8bba84a 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -21,5 +21,8 @@ py_test(gradient_checker SRCS gradient_checker.py)
 py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
 
 py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
+
 py_test(test_operator SRCS test_operator.py)
+
+py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
 py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index b73c4869d14a62a951d8e45dafb14b7523355519..aacc5e88feeb65e08093a35ef85837c916cfd39e 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -92,15 +92,27 @@ def get_numeric_gradient(op,
 
 
 class GradientChecker(unittest.TestCase):
-    def __is_close(self, numeric_grads, scope, max_relative_error):
+    def assert_is_close(self, numeric_grads, scope, max_relative_error,
+                        msg_prefix):
         for name in numeric_grads:
-            op_grad = numpy.array(
-                scope.find_var(grad_var_name(name)).get_tensor())
-            is_close = numpy.allclose(
-                numeric_grads[name], op_grad, rtol=max_relative_error, atol=100)
-            if not is_close:
-                return False
-        return True
+            b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
+            a = numeric_grads[name]
+
+            abs_a = numpy.abs(a)
+            # if abs_a is nearly zero, then use abs error for a, not relative
+            # error.
+            abs_a[abs_a < 1e-3] = 1
+
+            diff_mat = numpy.abs(a - b) / abs_a
+            max_diff = numpy.max(diff_mat)
+
+            def err_msg():
+                offset = numpy.argmax(diff_mat > max_relative_error)
+                return "%s Variable %s max gradient diff %f over limit %f, the first " \
+                       "error element is %d" % (
+                       msg_prefix, name, max_diff, max_relative_error, offset)
+
+            self.assertLessEqual(max_diff, max_relative_error, err_msg())
 
     def check_grad(self,
                    forward_op,
@@ -145,7 +157,8 @@ class GradientChecker(unittest.TestCase):
         # get numeric gradient
         for check_name in inputs_to_check:
             numeric_grad[check_name] = \
-                get_numeric_gradient(forward_op, input_vars, output_name, check_name)
+                get_numeric_gradient(forward_op, input_vars, output_name,
+                                     check_name)
 
         # get operator gradient according to different device
         for place in places:
@@ -187,15 +200,8 @@ class GradientChecker(unittest.TestCase):
             backward_op.infer_shape(scope)
             backward_op.run(scope, ctx)
 
-            if isinstance(place, core.CPUPlace):
-                msg = "CPU kernel gradient is not close to numeric gradient"
-            else:
-                if isinstance(place, core.GPUPlace):
-                    msg = "GPU kernel gradient is not close to numeric gradient"
-                else:
-                    raise ValueError("unknown place " + type(place))
-            self.assertTrue(
-                self.__is_close(numeric_grad, scope, max_relative_error), msg)
+            self.assert_is_close(numeric_grad, scope, max_relative_error,
+                                 "Gradient Check On %s" % str(place))
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..f95ed70b58d611b3233a21d3f2a34c864ae4d1b3
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
@@ -0,0 +1,36 @@
+import unittest
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+import numpy
+
+
+class GaussianRandomTest(unittest.TestCase):
+    def test_cpu(self):
+        self.gaussian_random_test(place=core.CPUPlace())
+
+    def test_gpu(self):
+        if core.is_compile_gpu():
+            self.gaussian_random_test(place=core.GPUPlace(0))
+
+    def gaussian_random_test(self, place):
+        scope = core.Scope()
+        scope.new_var("Out").get_tensor()
+
+        op = Operator(
+            "gaussian_random",
+            Out="Out",
+            dims=[1000, 784],
+            mean=.0,
+            std=1.,
+            seed=10)
+
+        op.infer_shape(scope)
+        context = core.DeviceContext.create(place)
+        op.run(scope, context)
+        tensor = numpy.array(scope.find_var("Out").get_tensor())
+        self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
+        self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
+
+
+if __name__ == '__main__':
+    unittest.main()