diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc
index 130f18dde4f979a6a9925ede9cbf745fcec14d48..2826b82117db113d4d8c10095e89f610ca895775 100644
--- a/paddle/fluid/operators/fill_constant_op.cc
+++ b/paddle/fluid/operators/fill_constant_op.cc
@@ -15,7 +15,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/platform/device_context.h"
 
 namespace paddle {
 namespace operators {
@@ -41,19 +40,33 @@ class FillConstantOp : public framework::OperatorBase {
         static_cast<framework::proto::VarType::Type>(Attr<int>("dtype"));
     auto value = Attr<float>("value");
     auto force_cpu = Attr<bool>("force_cpu");
-    auto &out =
-        *scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
-    out.Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
+
+    framework::Tensor *tensor = nullptr;
+
+    auto &out_var = *scope.FindVar(Output("Out"));
+
+    if (out_var.IsType<framework::LoDTensor>()) {
+      tensor = out_var.GetMutable<framework::LoDTensor>();
+      tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
+    } else if (out_var.IsType<framework::SelectedRows>()) {
+      tensor = out_var.GetMutable<framework::SelectedRows>()->mutable_value();
+      tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
+    } else {
+      PADDLE_THROW(
+          "fill constant op's output only"
+          "supports SelectedRows and LoDTensor");
+    }
+
     if (force_cpu) {
       auto cpu = platform::CPUPlace();
-      out.mutable_data(cpu, framework::ToTypeIndex(data_type));
+      tensor->mutable_data(cpu, framework::ToTypeIndex(data_type));
     } else {
-      out.mutable_data(dev_place, framework::ToTypeIndex(data_type));
+      tensor->mutable_data(dev_place, framework::ToTypeIndex(data_type));
     }
 
     platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
     auto &dev_ctx = *pool.Get(dev_place);
-    math::set_constant(dev_ctx, &out, value);
+    math::set_constant(dev_ctx, tensor, value);
   }
 };
 
diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc
index 5248767c2eeb9388c26d203e64f8b2c68ffe0865..763bb403588d13c15271d26b09813dddf3a5dd8c 100644
--- a/paddle/fluid/operators/uniform_random_op.cc
+++ b/paddle/fluid/operators/uniform_random_op.cc
@@ -37,7 +37,7 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
     } else {
       PADDLE_THROW(
           "uniform_random_op's output only"
-          "supports SelectedRows and Tensor");
+          "supports SelectedRows and LoDTensor");
     }
     T* data = tensor->mutable_data<T>(ctx.GetPlace());
     unsigned int seed = static_cast<unsigned int>(ctx.Attr<int>("seed"));
diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu
index e1c7323a30233f4ec4f60e46aa6088ee6d8601b7..bbb692b0ddfc18e8a62c0d2a6bac88f9932f6704 100644
--- a/paddle/fluid/operators/uniform_random_op.cu
+++ b/paddle/fluid/operators/uniform_random_op.cu
@@ -54,7 +54,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
     } else {
       PADDLE_THROW(
           "uniform_random_op's output only"
-          "supports SelectedRows and Tensor");
+          "supports SelectedRows and LoDTensor");
     }
     T* data = tensor->mutable_data<T>(context.GetPlace());
     unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
diff --git a/python/paddle/fluid/tests/unittests/dist_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_simnet_bow.py
index 59fca7073511ea45e790b549515db2b67df6212a..5c2341a2d185fdca3783683596b5db12fc151767 100644
--- a/python/paddle/fluid/tests/unittests/dist_simnet_bow.py
+++ b/python/paddle/fluid/tests/unittests/dist_simnet_bow.py
@@ -91,16 +91,21 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
         param_attr=fluid.ParamAttr(
-            name="__emb__", learning_rate=emb_lr),
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__emb__",
+            learning_rate=emb_lr),
         is_sparse=is_sparse)
     ## vsum
     q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum')
     q_ss = fluid.layers.softsign(q_sum)
     ## fc layer after conv
-    q_fc = fluid.layers.fc(input=q_ss,
-                           size=hid_dim,
-                           param_attr=fluid.ParamAttr(
-                               name="__q_fc__", learning_rate=base_lr))
+    q_fc = fluid.layers.fc(
+        input=q_ss,
+        size=hid_dim,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__q_fc__",
+            learning_rate=base_lr))
     # label data
     label = fluid.layers.data(name="label", shape=[1], dtype="int64")
     # pt
@@ -112,17 +117,22 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
         param_attr=fluid.ParamAttr(
-            name="__emb__", learning_rate=emb_lr),
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__emb__",
+            learning_rate=emb_lr),
         is_sparse=is_sparse)
     ## vsum
     pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum')
     pt_ss = fluid.layers.softsign(pt_sum)
     ## fc layer
-    pt_fc = fluid.layers.fc(input=pt_ss,
-                            size=hid_dim,
-                            param_attr=fluid.ParamAttr(
-                                name="__fc__", learning_rate=base_lr),
-                            bias_attr=fluid.ParamAttr(name="__fc_b__"))
+    pt_fc = fluid.layers.fc(
+        input=pt_ss,
+        size=hid_dim,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__fc__",
+            learning_rate=base_lr),
+        bias_attr=fluid.ParamAttr(name="__fc_b__"))
     # nt
     nt = fluid.layers.data(
         name="neg_title_ids", shape=[1], dtype="int64", lod_level=1)
@@ -132,17 +142,22 @@ def train_network(batch_size, is_distributed=False, is_sparse=False):
         is_distributed=is_distributed,
         size=[dict_dim, emb_dim],
         param_attr=fluid.ParamAttr(
-            name="__emb__", learning_rate=emb_lr),
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__emb__",
+            learning_rate=emb_lr),
         is_sparse=is_sparse)
     ## vsum
     nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum')
     nt_ss = fluid.layers.softsign(nt_sum)
     ## fc layer
-    nt_fc = fluid.layers.fc(input=nt_ss,
-                            size=hid_dim,
-                            param_attr=fluid.ParamAttr(
-                                name="__fc__", learning_rate=base_lr),
-                            bias_attr=fluid.ParamAttr(name="__fc_b__"))
+    nt_fc = fluid.layers.fc(
+        input=nt_ss,
+        size=hid_dim,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Constant(value=0.01),
+            name="__fc__",
+            learning_rate=base_lr),
+        bias_attr=fluid.ParamAttr(name="__fc_b__"))
     cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc)
     cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc)
     # loss
@@ -163,7 +178,6 @@ def get_one_data(file_list):
         with open(file, "r") as fin:
             for i in fin:
                 contents.append(i.strip())
-            random.shuffle(contents)
             for index, q in enumerate(contents):
                 try:
                     one_data = [[int(j) for j in i.split(" ")]
@@ -205,7 +219,8 @@ def get_train_reader(batch_size):
 class TestDistSimnetBow2x2(TestDistRunnerBase):
     def get_model(self, batch_size=2):
         # Train program
-        avg_cost, acc, predict = train_network(batch_size, False, False)
+        avg_cost, acc, predict = \
+            train_network(batch_size, bool(int(os.environ["IS_DISTRIBUTED"])), bool(int(os.environ["IS_SPARSE"])))
 
         inference_program = fluid.default_main_program().clone()
 
@@ -219,7 +234,15 @@ class TestDistSimnetBow2x2(TestDistRunnerBase):
 
 
 if __name__ == "__main__":
+    paddle.dataset.common.download(DATA_URL, 'simnet', DATA_MD5, "train")
+
     import os
     os.environ['CPU_NUM'] = '1'
-    paddle.dataset.common.download(DATA_URL, 'simnet', DATA_MD5, "train")
+
+    os.environ["IS_DISTRIBUTED"] = '0'
+    os.environ["IS_SPARSE"] = '0'
     runtime_main(TestDistSimnetBow2x2)
+
+#    os.environ["IS_DISTRIBUTED"] = '0'
+#    os.environ["IS_SPARSE"] = '1'
+#    runtime_main(TestDistSimnetBow2x2)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py
index 0e815c91446b285ba2c2c5aa9ad18d97f51eae65..d05fd508c57a8e9eba4cbb503b919a4a69aecdfe 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -155,7 +155,7 @@ class TestDistBase(unittest.TestCase):
         self._sync_mode = True
         self._setup_config()
 
-    def start_pserver(self, model_file, check_error_log):
+    def start_pserver(self, model_file, check_error_log, required_envs):
         sync_mode_str = "TRUE" if self._sync_mode else "FALSE"
         ps0_ep, ps1_ep = self._ps_endpoints.split(",")
         ps0_cmd = "%s %s pserver %s 0 %s %d TRUE %s" % \
@@ -168,15 +168,23 @@ class TestDistBase(unittest.TestCase):
         ps0_pipe = subprocess.PIPE
         ps1_pipe = subprocess.PIPE
         if check_error_log:
+            required_envs["GLOG_v"] = "7"
+            required_envs["GLOG_logtostderr"] = "1"
             print("ps0_cmd:", ps0_cmd)
             print("ps1_cmd:", ps1_cmd)
             ps0_pipe = open("/tmp/ps0_err.log", "wb")
             ps1_pipe = open("/tmp/ps1_err.log", "wb")
 
         ps0_proc = subprocess.Popen(
-            ps0_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps0_pipe)
+            ps0_cmd.split(" "),
+            stdout=subprocess.PIPE,
+            stderr=ps0_pipe,
+            env=required_envs)
         ps1_proc = subprocess.Popen(
-            ps1_cmd.split(" "), stdout=subprocess.PIPE, stderr=ps1_pipe)
+            ps1_cmd.split(" "),
+            stdout=subprocess.PIPE,
+            stderr=ps1_pipe,
+            env=required_envs)
 
         if not check_error_log:
             return ps0_proc, ps1_proc, None, None