diff --git a/paddle/fluid/framework/selected_rows.h b/paddle/fluid/framework/selected_rows.h index 6c31dada686b9493f85063039781dc46324d4fe4..bc5726382f81c3a3058a5ac4120f741e788704ac 100644 --- a/paddle/fluid/framework/selected_rows.h +++ b/paddle/fluid/framework/selected_rows.h @@ -118,7 +118,8 @@ class SelectedRows { * * @return index of the key. */ - int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false) { + inline int64_t AutoGrownIndex(int64_t key, bool auto_grown, + bool is_test = false) { if (is_test) { auto iter = id_to_index_.find(key); if (iter == id_to_index_.end()) { diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc index 6d1fb292362a1223cd70ab5cad3c6c84a0c78fc1..f3329c4855589b551bc924518a46f4f196668af9 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/hierarchical_sigmoid_op.h" +#include #include - namespace paddle { namespace operators { @@ -109,7 +109,8 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Bias", "(LoDTensor, optional), The bias is a tensor with shape or " "[non_leaf_num, 1]" - "[num_classes - 1, 1]."); + "[num_classes - 1, 1].") + .AsDispensable(); AddOutput( "Out", "(LoDTensor, required) The output of hierarchical sigmoid operator." @@ -173,31 +174,42 @@ class HierarchicalSigmoidGradOpGradVarTypeInference public: void operator()(const framework::OpDesc& op_desc, framework::BlockDesc* block) const override { - auto out_W_var_name = op_desc.Output(framework::GradVarName("W")).front(); - auto out_Bias_var_name = - op_desc.Output(framework::GradVarName("Bias")).front(); + auto w_grad_var_name = op_desc.Output(framework::GradVarName("W")).front(); + auto bias_grad_var_name_vec = + op_desc.Output(framework::GradVarName("Bias")); + std::string bias_grad_var_name; + bool hasBias = false; + if (bias_grad_var_name_vec.size()) { + hasBias = true; + bias_grad_var_name = + op_desc.Output(framework::GradVarName("Bias")).front(); + } auto attr = op_desc.GetAttr("is_sparse"); bool is_sparse = boost::get(attr); if (is_sparse) { - VLOG(3) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") - << " is set to SelectedRows"; - block->Var(out_W_var_name) - ->SetType(framework::proto::VarType::SELECTED_ROWS); - VLOG(3) << "hierarchical_sigmoid_grad op " - << framework::GradVarName("Bias") << " is set to SelectedRows"; - block->Var(out_Bias_var_name) + VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") + << " is set to SelectedRows"; + block->Var(w_grad_var_name) ->SetType(framework::proto::VarType::SELECTED_ROWS); + if (hasBias) { + VLOG(30) << "hierarchical_sigmoid_grad op " + << framework::GradVarName("Bias") << " is set to SelectedRows"; + block->Var(bias_grad_var_name) + ->SetType(framework::proto::VarType::SELECTED_ROWS); + } } else { - VLOG(3) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") - << " is set to LoDTensor"; - block->Var(out_W_var_name) - ->SetType(framework::proto::VarType::LOD_TENSOR); - VLOG(3) << "hierarchical_sigmoid_grad op " - << framework::GradVarName("Bias") << " is set to LoDTensor"; - block->Var(out_Bias_var_name) + VLOG(30) << "hierarchical_sigmoid_grad op " << framework::GradVarName("W") + << " is set to LoDTensor"; + block->Var(w_grad_var_name) ->SetType(framework::proto::VarType::LOD_TENSOR); + if (hasBias) { + VLOG(30) << "hierarchical_sigmoid_grad op " + << framework::GradVarName("Bias") << " is set to LoDTensor"; + block->Var(bias_grad_var_name) + ->SetType(framework::proto::VarType::LOD_TENSOR); + } } - block->Var(out_W_var_name)->SetDataType(block->Var("W")->GetDataType()); + block->Var(w_grad_var_name)->SetDataType(block->Var("W")->GetDataType()); } }; diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.h b/paddle/fluid/operators/hierarchical_sigmoid_op.h index f046fba7fc21951ce98e75bce5a89cfd7bb845ab..de219bacddc28d5d7ca654780751fb67a5934748 100644 --- a/paddle/fluid/operators/hierarchical_sigmoid_op.h +++ b/paddle/fluid/operators/hierarchical_sigmoid_op.h @@ -33,7 +33,6 @@ using platform::Transform; std::vector cal_rows(const framework::LoDTensor& path) { std::set tmp; std::vector rows; - rows.clear(); for (size_t i = 0; i < static_cast(path.dims()[0]); i++) { for (size_t j = 0; j < static_cast(path.dims()[1]); j++) { int64_t temp = @@ -63,8 +62,6 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { bool is_custom = false; if (path) { is_custom = true; - } else { - is_custom = false; } int64_t code_length = path ? path->dims()[1] : math::FindLastSet(num_classes - 1); @@ -96,7 +93,7 @@ class HierarchicalSigmoidOpKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); auto out_mat = framework::EigenVector::Flatten(*out); if (bias) { - bit_code->Add(pre_out, *bias); + bit_code->Add(*bias, pre_out); } bit_code->Mul(pre_out, *w, *in); // clip to [-40, 40] @@ -145,8 +142,6 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { bool is_custom = false; if (path) { is_custom = true; - } else { - is_custom = false; } std::unique_ptr> bit_code; @@ -192,7 +187,7 @@ class HierarchicalSigmoidGradOpKernel : public framework::OpKernel { auto* w_grad = ctx.Output(framework::GradVarName("W")); w_grad->set_rows(real_rows); - // build ids -> rows index map + // Build a map of id -> row_index to speed up finding the index of one id w_grad->SyncIndex(); w_grad->set_height(w->dims()[0]); auto* w_grad_value = w_grad->mutable_value(); diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/fluid/operators/math/matrix_bit_code.cc index e283320bcc8fe8b8ddcc4f1e0ee863b2b4c32abb..297e8d850b2b444289f3927db128f205d516f96a 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.cc +++ b/paddle/fluid/operators/math/matrix_bit_code.cc @@ -19,8 +19,8 @@ namespace operators { namespace math { template -void MatrixBitCodeFunctor::Add(framework::LoDTensor* tmat, - const framework::LoDTensor& vec) { +void MatrixBitCodeFunctor::Add(const framework::LoDTensor& vec, + framework::LoDTensor* tmat) { size_t batch_size = tmat->dims()[0]; size_t width = tmat->dims()[1]; for (size_t i = 0; i < batch_size; ++i) { diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/fluid/operators/math/matrix_bit_code.h index 673fcb65c81e07a5391a1518e330d95979f7a452..3add06cb635e18304907c789924cf841b8e8c86a 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.h +++ b/paddle/fluid/operators/math/matrix_bit_code.h @@ -234,7 +234,7 @@ class MatrixBitCodeFunctor { /* For j < code_length tmat(i, j) += vec(0, index(i, j)) */ - void Add(framework::LoDTensor* tmat, const framework::LoDTensor& vec); + void Add(const framework::LoDTensor& vec, framework::LoDTensor* tmat); /* For j < code_length vec(0, index(i, j)) += tmat(i, j) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e98989f5bddd2ec34a9be691b56eedef0264509c..7da3a9b4fb598339ea766a77889ce18901f53928 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4535,12 +4535,12 @@ def nce(input, def hsigmoid(input, label, num_classes=None, - non_leaf_num=None, - ptable=None, - pcode=None, param_attr=None, bias_attr=None, name=None, + non_leaf_num=None, + ptable=None, + pcode=None, is_costum=False, is_sparse=False): """ @@ -4583,7 +4583,8 @@ def hsigmoid(input, will be named automatically. Default: None. is_costum: (bool|False)using user defined binary tree instead of default complete binary tree, if costum is set you need to set ptable/pcode/non_leaf_num, otherwise num_classes should be set - is_sparse: (bool|False)using sparse update instead of dense update + is_sparse: (bool|False)using sparse update instead of dense update, if set, the gradient + of W and input will be sparse. Returns: Out: (LodTensor) The cost of hierarchical sigmoid operator. the shape is [N, 1] diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index a3024dded6ef37ca38600e45ced77e1c84d703bf..955fc51d57d43ede1c139e433fdaea22a65ed2e6 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -110,12 +110,13 @@ def hsigmoidWithCustomTree(x, w, ptable, pcode, label, bias, num_classes): pre_output = np.zeros((batch_size, code_length)) pre_sum = np.zeros((batch_size, 1)) out = np.zeros((batch_size, 1)).astype("float32") - for i in range(batch_size): - code_table = CodeTableWithCustomTree(ptable, pcode, i) - length = code_table.get_length() - for j in range(length): - idx = code_table.cal_index(j) - pre_output[i][j] += bias[idx][0] + if isinstance(bias, np.ndarray): + for i in range(batch_size): + code_table = CodeTableWithCustomTree(ptable, pcode, i) + length = code_table.get_length() + for j in range(length): + idx = code_table.cal_index(j) + pre_output[i][j] += bias[idx][0] for i in range(batch_size): code_table = CodeTableWithCustomTree(ptable, pcode, i) length = code_table.get_length() @@ -215,11 +216,11 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase): cost = fluid.layers.hsigmoid( input=emb, label=label, + bias_attr=True, non_leaf_num=3, ptable=ptable, pcode=pcode, is_costum=True, - bias_attr=True, is_sparse=is_sparse) avg_cost = fluid.layers.reduce_mean(cost) @@ -299,5 +300,47 @@ class TestHSigmoidOpWithCostumTree(OpTest): self.check_grad(['Bias', 'X', 'W'], ['Out'], no_grad_set=set('Label')) +class TestHSigmoidOpWithCostumTreeWithoutBias(OpTest): + def setUp(self): + self.op_type = "hierarchical_sigmoid" + num_classes = 6 #using 1,2,3,4,5,6 to build a huffman tree and select 1,2,5,6 as sample + feature_size = 8 + batch_size = 4 + x = np.random.random((batch_size, feature_size)).astype("float32") * 2 + w = np.random.random( + (num_classes - 1, feature_size)).astype("float32") * 2 + label = np.array([0, 1, 4, 5]) + ptable = np.array( + [(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (0, 1, 4, -1, -1), + (0, 2, -1, -1, + -1)]) #np.array to store 1,2,5,6s' non-leaf path(root -> leaf) + pcode = np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), ( + 1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]) #np.array to store + # bias = np.random.random((num_classes - 1, 1)).astype("float32") + self.attrs = {'num_classes': num_classes, 'is_sparse': False} + self.inputs = { + 'X': x, + 'W': w, + 'PTable': ptable, + 'PCode': pcode, + 'Label': label, + } + pre_output, out = hsigmoidWithCustomTree( + x=x, + w=w, + ptable=ptable, + pcode=pcode, + label=label, + bias=None, + num_classes=num_classes) + self.outputs = {'PreOut': pre_output, 'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'W'], ['Out'], no_grad_set=set('Label')) + + if __name__ == '__main__': unittest.main()