remove debug print

4c98c2cc · xuezhong · 58ad40cc · 4c98c2cc · 4c98c2cc · 4c98c2cc
4 changed file
--- a/paddle/fluid/operators/math/sample_prob.cu
+++ b/paddle/fluid/operators/math/sample_prob.cu
@@ -112,33 +112,6 @@ int UniqSampler(const Sampler& sampler, const std::size_t num_samples,
  }
  return num_tries;
 }
-/*
-template <typename T>
-void Print(Tensor & t, std::string name) {
-  if (!FLAGS_debug_print) {
-    return;
-  }
-  VLOG(1) << "qxz print "<< name;
-  VLOG(1) << name << "size = " << t.numel();
-  size_t size = t.numel();
-  type *d = t.data<type>();
-#ifdef PADDLE_WITH_CUDA
-    std::vector<type> vec;
-    platform::DeviceContextPool::Instance().Get(t.place())->Wait();
-    if (platform::is_gpu_place(t.place())) {
-      vec.resize(size);
-      cudaMemcpy(vec.data(), d, sizeof(T) * size, cudaMemcpyDeviceToHost);
-      d = vec.data();
-    }
-#endif
-  VLOG(1) << name << " data_ptr = " << static_cast<void*>(d);
-  std::string out;
-  for (size_t i = 0; i < size; i++) {
-       out += std::to_string(d[i]);
-       out += ",";
-  }
-  VLOG(1) << out;
-}*/
 template <typename T>
 void GPUSampleWithProb<T>::operator()(

--- a/paddle/fluid/operators/sample_logits_op.cc
+++ b/paddle/fluid/operators/sample_logits_op.cc
@@ -64,12 +64,13 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsIntermediate();
    AddOutput("SampledLogits",
              "(Tensor, default: Tensor<float>), A 2-D tensor with shape"
-              "[N x S+NT]. The outputs value of sampled softmax, which will be"
+              "[N x S+NT]. The outputs value of sample logits, which will be"
              "used in backward calculation.")
        .AsIntermediate();
-    AddOutput("SampledLabel",
+    AddOutput(
-              "(Tensor, default: Tensor<int64>), A 2-D tensor. The cross "
+        "SampledLabel",
-              "entropy loss with shape [N x NT].");
+        "(Tensor, default: Tensor<int64>), A 2-D tensor. The sampled label"
+        "with shape [N x S + NT].");
    AddAttr<bool>(
        "use_custom_samples",
        "An indicator whether to use custom samples with probabilities, if True"
@@ -81,7 +82,7 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker {
        "An indicator whether to sample non-repetitive negtive labels, if True"
        "the operator will sample negtive labels without replacement."
        "otherwise, the operator will sample negtive labels with replacement.")
-        .SetDefault(false);
+        .SetDefault(true);
    AddAttr<bool>(
        "remove_accidental_hits",
        "An indicator whether to remove accidental hits when samples hits true"
@@ -92,35 +93,11 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("seed", "Random seed for generating samples").SetDefault(0);
    AddComment(R"DOC(
-TODO(chenfeiyu): Write documentation for this Operator.
+  """
-Sampled Softmax With Cross Entropy Operator.
+  Computes sampled output training logits and labels suitable for implementing
+  sampled softmax.
-Cross entropy loss with sampled softmax is used as the output layer extensively.
-This operator computes the softmax normalized values for each row of the input
-tensor, after which cross-entropy loss is computed. This provides a more
-numerically stable gradient.
-Because this operator performs a softmax on logits internally, it expects
-unscaled logits. This operator should not be used with the output of
-softmax operator since that would produce incorrect results.
-When the attribute soft_label is set false, this operators expects mutually
-exclusive hard labels, each sample in a batch is in exactly one class with a
-probability of 1.0. Each sample in the batch will have a single label.
-The equation is as follows:
-1) Hard label (one-hot label, so every sample has exactly one class)
-$$Loss_j =  -\text{Logit}_{Label_j} +
-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right),
-j = 1,..., K$$
-2) Soft label (each sample can have a distribution over all classes)
-$$Loss_j =  -\sum_{i=0}^{K}\text{Label}_i \left(\text{Logit}_i -
+  """
-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right),
-j = 1,...,K$$
 )DOC");
  }

--- a/paddle/fluid/operators/sample_logits_op.cu
+++ b/paddle/fluid/operators/sample_logits_op.cu
@@ -248,8 +248,7 @@ class SampleLogitsGradCUDAKernel : public framework::OpKernel<T> {
    if (!FLAGS_debug_print) {
      return;
    }
-    VLOG(1) << "qxz print " << name;
+    VLOG(1) << name << " size = " << t.numel();
-    VLOG(1) << name << "size = " << t.numel();
    size_t size = t.numel();
    const type* d = t.data<type>();
 #ifdef PADDLE_WITH_CUDA

--- a/paddle/fluid/operators/sample_logits_op.h
+++ b/paddle/fluid/operators/sample_logits_op.h
@@ -207,37 +207,6 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
                                        num_true);
    }
-    /* Debug
-    const auto num_sampled_classes = samples_dim[1];
-    std::cout << "Sampled Logits" << std::endl;
-    const auto sampled_logits_data = sampled_logits->data<T>();
-    for (int i = 0; i < sampled_logits->numel(); ++i) {
-      std::cout << sampled_logits_data[i] << ", ";
-      if ((i + 1) % num_sampled_classes == 0)
-        std::cout << std::endl;
-    }
-    std::cout << std::endl;
-    */
-    /* Debug
-    std::cout << "Samples" << std::endl;
-    const auto samples_data = samples->data<int64_t>();
-    for (int i = 0; i < samples->numel(); ++i) {
-      std::cout << samples_data[i] << ", ";
-      if ((i + 1) % num_sampled_classes == 0)
-        std::cout << std::endl;
-    }
-    std::cout << std::endl;
-    */
-    /* Debug
-    std::cout << "Probabilities" << std::endl;
-    const auto probabilities_data = probabilities->data<T>();
-    for (int i = 0; i < probabilities->numel(); ++i) {
-      std::cout << probabilities_data[i] << ", ";
-      if ((i + 1) % num_sampled_classes == 0)
-        std::cout << std::endl;
-    }
-    std::cout << std::endl;
-    */
    // subtracted sampled logits with logQ(y|x)
    auto probs = EigenMatrix<T>::From(*probabilities);
    auto smp_logits = EigenMatrix<T>::From(*sampled_logits);
@@ -263,9 +232,6 @@ class SampleLogitsGradKernel : public framework::OpKernel<T> {
    math::SetConstant<platform::CPUDeviceContext, T> set_zero;
    set_zero(dev_ctx, logits_grad, static_cast<T>(0));
-    // const bool remove_accidental_hits =
-    //    context.Attr<bool>("remove_accidental_hits");
    // UNDERSTAND: scatter it back to logit_grad
    CPUPutAlongD1<T>(dev_ctx, logits_grad, *samples, *sampled_logits_grad);
  }