diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
index 042d90e72f8dd9e24e6e50038a8a67582aa0ad9c..6d1fb292362a1223cd70ab5cad3c6c84a0c78fc1 100644
--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -193,7 +193,7 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
       block->Var(out_W_var_name)
           ->SetType(framework::proto::VarType::LOD_TENSOR);
       VLOG(3) << "hierarchical_sigmoid_grad op "
-              << framework::GradVarName("Bias") << " is set to SelectedRows";
+              << framework::GradVarName("Bias") << " is set to LoDTensor";
       block->Var(out_Bias_var_name)
           ->SetType(framework::proto::VarType::LOD_TENSOR);
     }
diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/fluid/operators/math/matrix_bit_code.cc
index 0c1aa29a18dcc86533097a77d481dcac5f844ce9..e283320bcc8fe8b8ddcc4f1e0ee863b2b4c32abb 100644
--- a/paddle/fluid/operators/math/matrix_bit_code.cc
+++ b/paddle/fluid/operators/math/matrix_bit_code.cc
@@ -120,8 +120,6 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
   size_t input_width = input.dims()[1];
   size_t tmat_width = tmat.dims()[1];
   size_t weight_width = weight->dims()[1];
-  VLOG(30) << "sparse w_grad dims is [" << weight->dims()[0] << " ,"
-           << weight->dims()[1] << " ]";
   auto tmat_value = tmat.data<T>();
   auto weight_value = weight->data<T>();
   auto input_value = input.data<T>();
@@ -147,8 +145,6 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
   size_t input_width = input.dims()[1];
   size_t tmat_width = tmat.dims()[1];
   size_t weight_width = weight->value().dims()[1];
-  VLOG(30) << "sparse w_grad dims is: [" << weight->value().dims()[0] << " ,"
-           << weight->value().dims()[1] << " ]";
   auto tmat_value = tmat.data<T>();
   auto weight_value = weight->mutable_value()->data<T>();
   auto input_value = input.data<T>();
@@ -157,11 +153,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
     int code_length = code->get_length();
     for (int j = 0; j < code_length; ++j) {
       size_t index = code->calc_index(j);
-
       for (size_t k = 0; k < input_width; ++k) {
         int64_t row_index =
             weight->AutoGrownIndex(static_cast<int64_t>(index), false, true);
-
         weight_value[row_index * weight_width + k] +=
             tmat_value[i * tmat_width + j] * input_value[input_width * i + k];
       }
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 8170ccf08272f1bf31244216ab81be4de98dd95f..e98989f5bddd2ec34a9be691b56eedef0264509c 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4581,7 +4581,8 @@ def hsigmoid(input,
              is not set, the bias is initialized zero. Default: None.
         name (str|None): A name for this layer(optional). If set None, the layer
              will be named automatically. Default: None.
-        is_costum: (bool|False)using user defined binary tree instead of default complete binary tree
+        is_costum: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is 
+             set you need to set ptable/pcode/non_leaf_num, otherwise num_classes should be set
         is_sparse: (bool|False)using sparse update instead of dense update
 
     Returns: