diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 894d8dda3ddb078e29931705bca4745848d71f91..c40f6033419a2425d9996eb9a4584fc9cd1a70e3 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -97,8 +97,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) -paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'non_leaf_num', 'ptable', 'pcode', 'is_costum', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, False, False)) paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)) +paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)) paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc index 5b09958e73bdd5379b2c79721622d0c56d08bd0f..972dcf5494e9acd47e7ff615db45f056a43724a6 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc @@ -108,7 +108,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { .AsDispensable(); AddInput("Bias", "(LoDTensor, optional), The bias is a tensor with shape or " - "[non_leaf_num, 1]" + "[num_classes, 1]" "[num_classes - 1, 1].") .AsDispensable(); AddOutput( diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index b22e9715b8d1f49da1cfa07b488054e84e437863..4df74edfcebe4e8da7172c89f3958f3df2fd2c1f 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4584,11 +4584,10 @@ def nce(input, def hsigmoid(input, label, - num_classes=None, + num_classes, param_attr=None, bias_attr=None, name=None, - non_leaf_num=None, path_table=None, path_code=None, is_custom=False, @@ -4622,7 +4621,9 @@ def hsigmoid(input, and :math:`D` is the feature size. label (Variable): The tensor variable contains labels of training data. It's a tensor with shape is :math:`[N \\times 1]`. - num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set + num_classes: (int), The number of classes, must not be less than 2. with default tree this has to be set, + it should never be None under is_custom=False, but while is_custom is true, it should be non leaf num + which indicates the num of classes using by binary classify. param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid will create ParamAttr as param_attr. If the Initializer of the param_attr @@ -4634,7 +4635,6 @@ def hsigmoid(input, is not set, the bias is initialized zero. Default: None. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None. - non_leaf_num: this defines the number of non-leaf nodes in costumed tree path_table: (Variable|None) this variable can store each batch of samples' path to root, it should be in leaf -> root order path_table should have the same shape with path_code, and for each sample i path_table[i] indicates a np.array like @@ -4642,7 +4642,7 @@ def hsigmoid(input, path_code: (Variable|None) this variable can store each batch of samples' code, each code consist with every code of parent nodes. it should be in leaf -> root order is_custom: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is - set you need to set path_table/path_code/non_leaf_num, otherwise num_classes should be set + set you need to set path_table/path_code/num_classes, otherwise num_classes should be set is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient of W and input will be sparse. @@ -4671,8 +4671,8 @@ def hsigmoid(input, raise ValueError("path_code should not be None with costum tree") elif (is_custom) and (path_table is None): raise ValueError("path_table should not be None with costum tree") - elif (is_custom) and (non_leaf_num is None): - raise ValueError("non_leaf_num should not be None with costum tree") + elif (is_custom) and (num_classes is None): + raise ValueError("num_classes should not be None with costum tree") else: pass @@ -4687,7 +4687,7 @@ def hsigmoid(input, else: weights = helper.create_parameter( attr=helper.param_attr, - shape=[non_leaf_num, dim], + shape=[num_classes, dim], is_bias=False, dtype=input.dtype) inputs = { @@ -4708,7 +4708,7 @@ def hsigmoid(input, else: bias = helper.create_parameter( attr=helper.bias_attr, - shape=[non_leaf_num, 1], + shape=[num_classes, 1], is_bias=True, dtype=input.dtype) inputs['Bias'] = bias diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 4254c3bb250487f03fdb162f279642901098dbb2..2a6c93f75fad53440a2db64e4f34c9a5c22c654e 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -220,7 +220,7 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): input=emb, label=label, bias_attr=True, - non_leaf_num=3, + num_classes=3, path_table=path_table, path_code=path_code, is_custom=True, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index b8477820eeb97a93ebf0aeb3a1c12895a66cb2c7..541160771152dd2ebc8a782863bb4ad3643892e5 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -198,7 +198,7 @@ class TestBook(unittest.TestCase): layers.hsigmoid( input=x2, label=y2, - non_leaf_num=6, + num_classes=6, path_table=path_table, path_code=path_code, is_custom=True))