diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index ce382389aa0b340bea256b425ec4371499eb5207..0a206366db49b34951266ab74b96dc56fa419a96 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -33,10 +33,13 @@ class NCEOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInput("Weight"), "Input", "Weight", "nce"); OP_INOUT_CHECK(ctx->HasOutput("Cost"), "Output", "Cost", "nce"); - OP_INOUT_CHECK(ctx->HasOutput("SampleLogits"), "Output", "SampleLogits", - "nce"); - OP_INOUT_CHECK(ctx->HasOutput("SampleLabels"), "Output", "SampleLabels", - "nce"); + bool is_test = ctx->Attrs().Get("is_test"); + if (!is_test) { + OP_INOUT_CHECK(ctx->HasOutput("SampleLogits"), "Output", "SampleLogits", + "nce"); + OP_INOUT_CHECK(ctx->HasOutput("SampleLabels"), "Output", "SampleLabels", + "nce"); + } auto x_dims = ctx->GetInputDim("Input"); auto label_dims = ctx->GetInputDim("Label"); @@ -89,13 +92,15 @@ class NCEOp : public framework::OperatorWithKernel { out_dims.push_back(1); ctx->SetOutputDim("Cost", framework::make_ddim(out_dims)); - // set dims of output(SampleOut) - std::vector sample_out_dims; - sample_out_dims.push_back(x_dims[0]); - sample_out_dims.push_back( - (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); - ctx->SetOutputDim("SampleLogits", framework::make_ddim(sample_out_dims)); - ctx->SetOutputDim("SampleLabels", framework::make_ddim(sample_out_dims)); + if (!is_test) { + // set dims of output(SampleOut) + std::vector sample_out_dims; + sample_out_dims.push_back(x_dims[0]); + sample_out_dims.push_back( + (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); + ctx->SetOutputDim("SampleLogits", framework::make_ddim(sample_out_dims)); + ctx->SetOutputDim("SampleLabels", framework::make_ddim(sample_out_dims)); + } } protected: @@ -162,14 +167,16 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { "Given X is the dot product of input tensor and sampled labels' " "weights." "Then 'SampleLogits' is sigmoid(X).") - .AsIntermediate(); + .AsIntermediate() + .AsExtra(); AddOutput("SampleLabels", "An intermediate tensor of shape[batch_size, num_neg_samples + " "num_pos_samples]." "This tensor is output of forward kernel and used in backward " "kernel to compute grads." "") - .AsIntermediate(); + .AsIntermediate() + .AsExtra(); AddAttr("num_total_classes", "Total number of classes in all samples."); @@ -189,28 +196,38 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { // for parameter prefetch AddAttr("remote_prefetch", "").SetDefault(false); - AddAttr("trainer_id", "trainer id from 0 ~ worker_num.").SetDefault(0); + AddAttr("trainer_id", "trainer id from 0 ~ worker_num.") + .SetDefault(0) + .AsExtra(); AddAttr>("height_sections", "Height for each output SelectedRows.") - .SetDefault(std::vector({})); + .SetDefault(std::vector({})) + .AsExtra(); AddAttr>( "epmap", "(string vector, default 127.0.0.1:6164)" "Server endpoints in the order of input variables for mapping") - .SetDefault({}); + .SetDefault({}) + .AsExtra(); AddAttr>( "table_names", "(string vector, the split table names that will be fetched from " "parameter server)" "in the order of input variables for mapping") - .SetDefault({}); + .SetDefault({}) + .AsExtra(); AddAttr>("custom_neg_classes", "This attribute only be used in unitest. Classes " "in this list wiil be used as negative classes " "for every samples. Under normal conditions, " "user should avoid setting this attribute.") - .SetDefault({}); + .SetDefault({}) + .AsExtra(); + AddAttr("is_test", + "(bool, default false) Set to true for inference " + "only, false for training.") + .SetDefault(false); AddComment(R"DOC( Compute and return the noise-contrastive estimation training loss. See `Noise-contrastive estimation: A new estimation principle for unnormalized diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 74fda426e92ea61801cb66d4d32f864c16653dfd..364a0f02e3ab70d14f8e87d3da06f50f7ee991e6 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -41,7 +41,7 @@ using EigenMatrix = framework::EigenMatrix; template void PrepareSamples(const framework::ExecutionContext &context, - Sampler *sampler) { + Sampler *sampler, Tensor *sample_labels) { auto label = context.Input("Label"); const int64_t *label_data = label->data(); auto label_dims = label->dims(); @@ -49,7 +49,6 @@ void PrepareSamples(const framework::ExecutionContext &context, std::vector custom_neg_classes = context.Attr>("custom_neg_classes"); - auto sample_labels = context.Output("SampleLabels"); auto sample_labels_dims = sample_labels->dims(); int64_t *sample_labels_data = sample_labels->mutable_data(context.GetPlace()); @@ -82,6 +81,7 @@ class NCEKernel : public framework::OpKernel { int seed = context.Attr("seed"); int num_total_classes = context.Attr("num_total_classes"); int num_neg_samples = context.Attr("num_neg_samples"); + bool is_test = context.Attr("is_test"); Sampler *sampler; switch (sampler_type) { @@ -139,8 +139,29 @@ class NCEKernel : public framework::OpKernel { } } - PrepareSamples(context, sampler); - auto sample_labels = context.Output("SampleLabels"); + std::vector sample_out_dims; + auto label = context.Input("Label"); + Tensor *sample_labels; + Tensor *sample_out; + Tensor sample_labels_tmp, sample_out_tmp; + if (is_test) { + // set dims of output(SampleOut) + int num_true_classes = label->dims().size() == 2 ? label->dims()[1] : 1; + sample_out_dims.push_back((context.Input("Input"))->dims()[0]); + sample_out_dims.push_back( + (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); + + sample_labels = &sample_labels_tmp; + sample_labels->Resize(framework::make_ddim(sample_out_dims)); + + sample_out = &sample_out_tmp; + sample_out->Resize(framework::make_ddim(sample_out_dims)); + } else { + sample_labels = context.Output("SampleLabels"); + sample_out = context.Output("SampleLogits"); + } + + PrepareSamples(context, sampler, sample_labels); const int64_t *sample_labels_data = sample_labels->data(); for (int x = 0; x < sample_labels->numel(); x++) { @@ -152,9 +173,7 @@ class NCEKernel : public framework::OpKernel { x, sample_labels_data[x])); } - auto sample_out = context.Output("SampleLogits"); T *sample_out_data = sample_out->mutable_data(context.GetPlace()); - auto label = context.Input("Label"); auto sample_weight = context.Input("SampleWeight"); const T *sample_weight_data = nullptr; if (sample_weight != nullptr) { diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py index d98eb2aa19da216e47650ea3a1b424090c0e1db4..6c2fc4d84292845d895733122352aae8736fbc0e 100644 --- a/python/paddle/fluid/tests/unittests/test_nce.py +++ b/python/paddle/fluid/tests/unittests/test_nce.py @@ -77,7 +77,8 @@ class TestNCE(OpTest): 'custom_neg_classes': list(range(num_neg_samples)), 'seed': 0, 'sampler': 0, - 'is_sparse': is_sparse + 'is_sparse': is_sparse, + 'is_test': self.is_test } self.inputs = { 'Input': input, @@ -87,6 +88,9 @@ class TestNCE(OpTest): 'SampleWeight': sample_weight } + def set_is_test(self): + self.is_test = False + def set_data(self): self.generate_data(5, 25, 100, 1, 2, False) @@ -95,14 +99,18 @@ class TestNCE(OpTest): self.inputs['Bias'], self.inputs['SampleWeight'], self.inputs['Label'], self.attrs['num_total_classes'], self.attrs['num_neg_samples']) - self.outputs = { - 'Cost': out[0], - 'SampleLogits': out[1], - 'SampleLabels': out[2] - } + if self.is_test: + self.outputs = {'Cost': out[0]} + else: + self.outputs = { + 'Cost': out[0], + 'SampleLogits': out[1], + 'SampleLabels': out[2] + } def setUp(self): self.op_type = 'nce' + self.set_is_test() self.set_data() self.compute() @@ -119,6 +127,15 @@ class TestNCECase1Tensor(TestNCE): self.generate_data(10, 20, 100, 2, 5, False) +class TestNCETensorIsTest(TestNCE): + # if is_test = True, there's no need to calculate grad + def set_is_test(self): + self.is_test = True + + def test_check_grad(self): + pass + + class TestNCECase1SelectedRows(unittest.TestCase): def setUp(self): self.base_lr = 0.0001