From e3b28d5bd125310fe584ee15ff76308485fdda35 Mon Sep 17 00:00:00 2001 From: yaoxuefeng Date: Wed, 18 Mar 2020 17:18:04 +0800 Subject: [PATCH] Fix instag (#22632) (#22991) --- paddle/fluid/operators/filter_by_instag_op.cc | 3 + paddle/fluid/operators/filter_by_instag_op.h | 11 ++- paddle/fluid/operators/metrics/auc_op.h | 8 +- python/paddle/fluid/layers/nn.py | 7 +- .../unittests/test_filter_by_instag_op.py | 83 ++++++++++++++++++- 5 files changed, 101 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/filter_by_instag_op.cc b/paddle/fluid/operators/filter_by_instag_op.cc index b156246f04..fde6db79ec 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cc +++ b/paddle/fluid/operators/filter_by_instag_op.cc @@ -60,6 +60,9 @@ class FilterByInstagOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Ins_tag", "(LoDTensor) ins tag list"); AddInput("Filter_tag", "(1D Tensor) filter tag list"); AddAttr("is_lod", "is Ins with LoD info or not, default True"); + AddAttr("out_val_if_empty", + "if the output after filter is empty, the output value") + .SetDefault(0); AddOutput("Out", "(LoDTensor) embeded tensor filtered by instag"); AddOutput("LossWeight", "(Tensor) loss weight."); AddOutput("IndexMap", "(LoDTensor) mapping from Out rows to X1 rows"); diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 2f45d5417c..9234f9be47 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -47,6 +47,7 @@ class FilterByInstagKernel : public framework::OpKernel { // Dim [batch size, embedding size] auto* x1 = context.Input("Ins"); bool is_x1_lod = context.Attr("is_lod"); + int64_t out_val_if_empty = context.Attr("out_val_if_empty"); // X2 is ins tag list // LoD [[0, Sum(ins1), Sum(ins1, ins2), ... ]] auto* x2 = context.Input("Ins_tag"); @@ -157,7 +158,15 @@ class FilterByInstagKernel : public framework::OpKernel { std::vector> out_lod_info; out_lod_info.push_back(out_lods); out->set_lod(out_lod_info); - memset(out_data, 0, out->numel() * sizeof(T)); + for (int64_t oi = 0; oi < out->numel(); ++oi) { + if (std::is_same::value) { + out_data[oi] = (int32_t)out_val_if_empty; + } else if (std::is_same::value) { + out_data[oi] = (int64_t)out_val_if_empty; + } else { + out_data[oi] = static_cast(out_val_if_empty); + } + } loss_weight_data[0] = 0; } } diff --git a/paddle/fluid/operators/metrics/auc_op.h b/paddle/fluid/operators/metrics/auc_op.h index 2dfcdaa5db..10403472c6 100644 --- a/paddle/fluid/operators/metrics/auc_op.h +++ b/paddle/fluid/operators/metrics/auc_op.h @@ -102,9 +102,9 @@ class AucKernel : public framework::OpKernel { "The predict data must gather or equal 0.")); uint32_t binIdx = static_cast(predict_data * num_thresholds); - if (label_data[i]) { + if (label_data[i] > 0) { origin_stat_pos[binIdx] += 1; - } else { + } else if (label_data[i] == 0) { origin_stat_neg[binIdx] += 1; } } @@ -142,9 +142,9 @@ class AucKernel : public framework::OpKernel { "The predict data must gather or equal 0.")); uint32_t binIdx = static_cast(predict_data * num_thresholds); - if (label_data[i]) { + if (label_data[i] > 0) { origin_stat_pos[cur_step_begin + binIdx] += 1; - } else { + } else if (label_data[i] == 0) { origin_stat_neg[cur_step_begin + binIdx] += 1; } } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index b05261b4c6..782a868f2a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -9169,7 +9169,7 @@ def stack(x, axis=0): @templatedoc(op_type="filter_by_instag") -def filter_by_instag(ins, ins_tag, filter_tag, is_lod): +def filter_by_instag(ins, ins_tag, filter_tag, is_lod, out_val_if_empty=0): """ **Filter By Instag Layer** @@ -9206,6 +9206,8 @@ def filter_by_instag(ins, ins_tag, filter_tag, is_lod): filter_tag (Variable): Input Variable (1D Tensor/List), usually it is list that holds the tags. is_lod (Bool): Boolean value to indicate ins is lod tensor or not. + out_val_if_empty(Int64): If the output after filter is empty, this value + will be set to Output tensor. Returns: Variable: filtered ins (LoDTensor) and loss weight (Tensor) @@ -9233,7 +9235,8 @@ def filter_by_instag(ins, ins_tag, filter_tag, is_lod): outputs={'Out': out, 'LossWeight': loss_weight, 'IndexMap': mmap}, - attrs={'is_lod': is_lod}) + attrs={'is_lod': is_lod, + 'out_val_if_empty': out_val_if_empty}) return [out, loss_weight] diff --git a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py index 87540071f7..ecd2e2cd6c 100644 --- a/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py +++ b/python/paddle/fluid/tests/unittests/test_filter_by_instag_op.py @@ -23,6 +23,7 @@ import paddle.fluid.layers as layers from op_test import OpTest import random from decorator_helper import prog_scope +from paddle.fluid.op import Operator """This is Test Case 1""" @@ -71,7 +72,7 @@ class TestFilterByInstagOp(OpTest): 'IndexMap': (mmap, mmap_lod) } - self.attrs = {'is_lod': True} + self.attrs = {'is_lod': True, 'out_val_if_empty': 0} def test_check_output(self): self.check_output() @@ -116,7 +117,7 @@ class TestFilterByInstagOp2(OpTest): 'LossWeight': (loss_weight, mmap_lod), 'IndexMap': (mmap, mmap_lod) } - self.attrs = {'is_lod': True, } + self.attrs = {'is_lod': True, 'out_val_if_empty': 0} def test_check_output(self): self.check_output() @@ -158,7 +159,7 @@ class TestFilterByInstagOp3(OpTest): 'LossWeight': (loss_weight, mmap_lod), 'IndexMap': (mmap, mmap_lod) } - self.attrs = {'is_lod': True, } + self.attrs = {'is_lod': True, 'out_val_if_empty': 0} def test_check_output(self): self.check_output() @@ -199,7 +200,7 @@ class TestFilterByInstagOp4(OpTest): 'LossWeight': (loss_weight, mmap_lod), 'IndexMap': (mmap, mmap_lod) } - self.attrs = {'is_lod': False, } + self.attrs = {'is_lod': False, 'out_val_if_empty': 0} def test_check_output(self): self.check_output() @@ -209,5 +210,79 @@ class TestFilterByInstagOp4(OpTest): ['Ins'], 'Out', no_grad_set=set(['Ins_tag', 'Filter_tag'])) +class TestFilterByInstagOp6(OpTest): + def setUp(self): + self.op_type = 'filter_by_instag' + + x1 = np.random.random((4, 36)).astype('int64') + + x2 = np.array([[2], [1], [2], [1]]).astype('int64') + x2_lod = [[1, 1, 1, 1]] + + x3 = np.array([3]).astype('int64') + + out = np.zeros((1, 36)).astype('double') + out_lod = [[1]] + + mmap = np.array([[0, 1, 1]]).astype('int64') + mmap_lod = [[1]] + + loss_weight = np.array([[0]]).astype('double') + self.inputs = { + 'Ins': x1, + 'Ins_tag': (x2, x2_lod), + 'Filter_tag': x3, + } + self.outputs = { + 'Out': (out, out_lod), + 'LossWeight': (loss_weight, mmap_lod), + 'IndexMap': (mmap, mmap_lod) + } + self.attrs = {'is_lod': False, 'out_val_if_empty': 0} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + pass + + +class TestFilterByInstagOp7(OpTest): + def setUp(self): + self.op_type = 'filter_by_instag' + + x1 = np.random.random((4, 36)).astype('int32') + + x2 = np.array([[2], [1], [2], [1]]).astype('int64') + x2_lod = [[1, 1, 1, 1]] + + x3 = np.array([3]).astype('int64') + + out = np.zeros((1, 36)).astype('double') + out_lod = [[1]] + + mmap = np.array([[0, 1, 1]]).astype('int64') + mmap_lod = [[1]] + + loss_weight = np.array([[0]]).astype('double') + self.inputs = { + 'Ins': x1, + 'Ins_tag': (x2, x2_lod), + 'Filter_tag': x3, + } + self.outputs = { + 'Out': (out, out_lod), + 'LossWeight': (loss_weight, mmap_lod), + 'IndexMap': (mmap, mmap_lod) + } + self.attrs = {'is_lod': False, 'out_val_if_empty': 0} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + pass + + if __name__ == '__main__': unittest.main() -- GitLab