remove arg 'non_leaf_num', test=develop

a08dc83e · JiabinYang · 7594787d · a08dc83e · a08dc83e · a08dc83e
5 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -97,8 +97,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
 paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
-paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'non_leaf_num', 'ptable', 'pcode', 'is_costum', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, False, False))
 paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
+paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
 paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
 paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -108,7 +108,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsDispensable();
    AddInput("Bias",
             "(LoDTensor, optional), The bias is a tensor with shape or "
-             "[non_leaf_num, 1]"
+             "[num_classes, 1]"
             "[num_classes - 1, 1].")
        .AsDispensable();
    AddOutput(

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4584,11 +4584,10 @@ def nce(input,
 def hsigmoid(input,
             label,
-             num_classes=None,
+             num_classes,
             param_attr=None,
             bias_attr=None,
             name=None,
-             non_leaf_num=None,
             path_table=None,
             path_code=None,
             is_custom=False,
@@ -4622,7 +4621,9 @@ def hsigmoid(input,
            and :math:`D` is the feature size.
        label (Variable): The tensor variable contains labels of training data.
            It's a tensor with shape is :math:`[N \\times 1]`.
-        num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set
+        num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set, 
+            it should never be None under is_custom=False, but while is_custom is true, it should be non leaf num 
+            which indicates the num of classes using by binary classify.
        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
             of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid
             will create ParamAttr as param_attr. If the Initializer of the param_attr
@@ -4634,7 +4635,6 @@ def hsigmoid(input,
             is not set, the bias is initialized zero. Default: None.
        name (str|None): A name for this layer(optional). If set None, the layer
             will be named automatically. Default: None.
-        non_leaf_num: this defines the number of non-leaf nodes in costumed tree
        path_table: (Variable|None) this variable can store each batch of samples' path to root, 
            it should be in leaf -> root order
            path_table should have the same shape with path_code, and for each sample i path_table[i] indicates a np.array like 
@@ -4642,7 +4642,7 @@ def hsigmoid(input,
        path_code:  (Variable|None) this variable can store each batch of samples' code, 
            each code consist with every code of parent nodes. it should be in leaf -> root order
        is_custom: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is 
-             set you need to set path_table/path_code/non_leaf_num, otherwise num_classes should be set
+             set you need to set path_table/path_code/num_classes, otherwise num_classes should be set
        is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient 
             of W and input will be sparse.
@@ -4671,8 +4671,8 @@ def hsigmoid(input,
        raise ValueError("path_code should not be None with costum tree")
    elif (is_custom) and (path_table is None):
        raise ValueError("path_table should not be None with costum tree")
-    elif (is_custom) and (non_leaf_num is None):
+    elif (is_custom) and (num_classes is None):
-        raise ValueError("non_leaf_num should not be None with costum tree")
+        raise ValueError("num_classes should not be None with costum tree")
    else:
        pass
@@ -4687,7 +4687,7 @@ def hsigmoid(input,
    else:
        weights = helper.create_parameter(
            attr=helper.param_attr,
-            shape=[non_leaf_num, dim],
+            shape=[num_classes, dim],
            is_bias=False,
            dtype=input.dtype)
    inputs = {
@@ -4708,7 +4708,7 @@ def hsigmoid(input,
        else:
            bias = helper.create_parameter(
                attr=helper.bias_attr,
-                shape=[non_leaf_num, 1],
+                shape=[num_classes, 1],
                is_bias=True,
                dtype=input.dtype)
            inputs['Bias'] = bias

--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -220,7 +220,7 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
            input=emb,
            label=label,
            bias_attr=True,
-            non_leaf_num=3,
+            num_classes=3,
            path_table=path_table,
            path_code=path_code,
            is_custom=True,

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -198,7 +198,7 @@ class TestBook(unittest.TestCase):
                layers.hsigmoid(
                    input=x2,
                    label=y2,
-                    non_leaf_num=6,
+                    num_classes=6,
                    path_table=path_table,
                    path_code=path_code,
                    is_custom=True))