diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.h b/paddle/fluid/operators/hierarchical_sigmoid_op.h
index 2d500a03df87f5a05ec524d4c2993a8d7b5aa992..90bdb47311fce1a6be6324cc2d470089f8dade1b 100644
--- a/paddle/fluid/operators/hierarchical_sigmoid_op.h
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h
@@ -86,6 +86,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel<T> {
     trans(ctx.template device_context<DeviceContext>(), pre_out_data,
           pre_out_data + pre_out->numel(), pre_out_data,
           ClipFunctor<T>(static_cast<T>(-40.0), static_cast<T>(40.0)));
+    pre_out_mat = -1 * pre_out_mat;
     bit_code->Sum(*pre_out, out, static_cast<T>(-1));
     // use softrelu to calculate cross entropy
     pre_out_mat.device(place) = (static_cast<T>(1.0) + pre_out_mat.exp()).log();
@@ -146,6 +147,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
     auto pre_out_mat = EigenMatrix<T>::From(*pre_out);
     auto pre_out_grad_mat = EigenMatrix<T>::From(pre_out_grad);
     auto out_grad_mat = EigenMatrix<T>::From(*out_grad);
+
     Eigen::array<int, 2> bcast({{1, static_cast<int>(pre_out_grad.dims()[1])}});
 
     // softrelu derivative
@@ -160,9 +162,16 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel<T> {
       bias_grad->mutable_data<T>(ctx.GetPlace());
       zero(dev_ctx, bias_grad, static_cast<T>(0.0));
       bit_code->AddGrad(pre_out_grad, bias_grad);
+      auto bias_grad_mat = EigenMatrix<T>::From(*bias_grad);
+      bias_grad_mat = -1 * bias_grad_mat;
     }
     bit_code->MulGradWeight(pre_out_grad, w_grad, *in);
     bit_code->MulGradError(pre_out_grad, *w, in_grad);
+    auto w_grad_mat = EigenMatrix<T>::From(*w_grad);
+    auto in_grad_mat = EigenMatrix<T>::From(*in_grad);
+
+    w_grad_mat = -1 * w_grad_mat;
+    in_grad_mat = -1 * in_grad_mat;
   }
 };
 
diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/fluid/operators/math/matrix_bit_code.h
index f03c8d3689c8ebfb04f61219bca8708fe43cf3e1..1e2abd1e697a212772f8f757bd5669263e84832f 100644
--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -157,7 +157,7 @@ class CustomCode : public Code {
   int get_length() const {
     int length = 0;
 
-    for (int i = 0; i < ptable_->dims()[1]; i++) {
+    for (int i = 0; i < static_cast<int>(ptable_->dims()[1]); i++) {
       if (ptable_->data<R>()[index_ * static_cast<int>(ptable_->dims()[1]) +
                              i] != -1) {
         length++;
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
index fb521e86a3189f0189c5ea51bee9b81e2d1524a6..e97643cddef22465436051a41ef4b825e9634d23 100644
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -138,11 +138,8 @@ class OpTest(unittest.TestCase):
         cls.dtype = "float32"
         cls.outputs = {}
 
-        # np.random.seed(123)
-        # random.seed(124)
-
-        np.random.seed(190)
-        random.seed(200)
+        np.random.seed(123)
+        random.seed(124)
 
     @classmethod
     def tearDownClass(cls):
diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
index 4beeed01311bc36023cbbe8ce4c14680f5eec667..0a16f5a39c596713121903a3c88190c8a68d83bd 100644
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -17,6 +17,9 @@ from __future__ import print_function
 import unittest
 import numpy as np
 import math
+# import paddle.fluid as fluid
+# import paddle.fluid.core as core
+# from op_builder import OpBuilder
 from op_test import OpTest
 
 np.random.seed(100)
@@ -51,7 +54,7 @@ class CodeTableWithCustomTree(object):
 
     def get_length(self):
         length = 0
-        for ele in self.ptable_[self.index_]:
+        for ele in self.ptable_[self.index_]:  # find the first -1 to stop trace
 
             if ele >= 0:
                 length = length + 1
@@ -71,12 +74,10 @@ def hsigmoid(x, w, label, bias, num_classes):
     pre_sum = np.zeros((batch_size, 1))
     out = np.zeros((batch_size, 1)).astype("float32")
     for i in range(batch_size):
-        #print("\n leaf {leaf}: \n".format(leaf = label[i]))
         code_table = CodeTable(num_classes, label[i])
         length = code_table.get_length()
         for j in range(length):
             idx = code_table.cal_index(j)
-            #print("index {index} ".format(index = j))
             pre_output[i][j] += bias[0][idx]
     for i in range(batch_size):
         code_table = CodeTable(num_classes, label[i])
@@ -87,13 +88,12 @@ def hsigmoid(x, w, label, bias, num_classes):
     # clip[-40.0, 40.0]
     pre_output = np.clip(pre_output, -40.0, 40.0)
     # out(i, 0) = \sum_j  bit(i, j) * preout(i, j)
+    pre_output = -1 * pre_output
     for i in range(batch_size):
-        #print("\n leaf {leaf}: \n".format(leaf = label[i]))
         code_table = CodeTable(num_classes, label[i])
         length = code_table.get_length()
         sum = 0.0
         for j in range(length):
-            #print("bit {bit} ".format(bit = code_table.cal_bit(j)))
             if code_table.cal_bit(j):
                 sum += pre_output[i][j]
         out[i] = -1.0 * sum
@@ -108,6 +108,7 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
     batch_size = x.shape[0]
     code_length = len(ptable[0])
     code_table = [0 for _ in range(code_length)]
+    # init pre_out with shape [N, code_length]
     pre_output = np.zeros((batch_size, code_length))
     pre_sum = np.zeros((batch_size, 1))
     out = np.zeros((batch_size, 1)).astype("float32")
@@ -125,6 +126,7 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
             pre_output[i][j] += np.dot(w[idx], x[i])
     # clip[-40.0, 40.0]
     pre_output = np.clip(pre_output, -40.0, 40.0)
+    pre_output = -1 * pre_output
     # out(i, 0) = \sum_j  bit(i, j) * preout(i, j)
     for i in range(batch_size):
         code_table = CodeTableWithCustomTree(ptable, pcode, i)
@@ -141,26 +143,27 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes):
     return pre_output, out
 
 
-# class TestHSigmoidOp(OpTest):
-#     def setUp(self):
-#         self.op_type = "hierarchical_sigmoid"
-#         num_classes = 6
-#         feature_size = 8
-#         batch_size = 7
-#         x = np.random.random((batch_size, feature_size)).astype("float32")
-#         w = np.random.random((num_classes - 1, feature_size)).astype("float32")
-#         label = np.random.randint(0, num_classes, (batch_size, 1))
-#         bias = np.random.random((1, num_classes - 1)).astype("float32")
-#         self.attrs = {'num_classes': num_classes}
-#         self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias}
-#         pre_output, out = hsigmoid(x, w, label, bias, num_classes)
-#         self.outputs = {'PreOut': pre_output, 'Out': out}
+class TestHSigmoidOp(OpTest):
+    def setUp(self):
+        self.op_type = "hierarchical_sigmoid"
+        num_classes = 6
+        feature_size = 8
+        batch_size = 4
+        x = np.random.random((batch_size, feature_size)).astype("float32") * 2
+        w = np.random.random(
+            (num_classes - 1, feature_size)).astype("float32") * 2
+        label = np.random.randint(0, num_classes, (batch_size, 1))
+        bias = np.random.random((1, num_classes - 1)).astype("float32")
+        self.attrs = {'num_classes': num_classes}
+        self.inputs = {'X': x, 'W': w, 'Label': label, 'Bias': bias}
+        pre_output, out = hsigmoid(x, w, label, bias, num_classes)
+        self.outputs = {'PreOut': pre_output, 'Out': out}
 
-#     def test_check_output(self):
-#         self.check_output()
+    def test_check_output(self):
+        self.check_output()
 
-#     def test_check_grad(self):
-#         self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
+    def test_check_grad(self):
+        self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label'))
 
 
 class TestHSigmoidOpWithCostumTree(OpTest):
@@ -169,9 +172,9 @@ class TestHSigmoidOpWithCostumTree(OpTest):
         num_classes = 6  #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample
         feature_size = 8
         batch_size = 4
-        x = np.random.random((batch_size, feature_size)).astype("float32") * 10
+        x = np.random.random((batch_size, feature_size)).astype("float32") * 2
         w = np.random.random(
-            (num_classes - 1, feature_size)).astype("float32") * 10
+            (num_classes - 1, feature_size)).astype("float32") * 2
         label = np.array([0, 1, 4, 5])
         ptable = np.array(
             [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1),