fix lod_reset bug, test=develop (#21392)

b97fc16d · Zeng Jinle · GitHub · 89966525 · b97fc16d · b97fc16d
10 changed file
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -36,14 +36,15 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  if (src_ptr == dst_ptr && src_place == dst_place) {
+    VLOG(3) << "Skip copy the same data async from " << src_place << " to "
+            << dst_place;
+    return;
+  }
  auto size = src.numel() * SizeOfType(src.type());
  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
-    if (src_ptr == dst_ptr) {
-      VLOG(3) << "Skip copy the same data async from " << src_place << " to "
-              << dst_place;
-      return;
-    }
    memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
                 boost::get<platform::CPUPlace>(src_place), src_ptr, size);
  }
@@ -79,11 +80,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
    auto stream =
        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
    if (platform::is_same_place(src_place, dst_place)) {
-      if (src_ptr == dst_ptr) {
-        VLOG(3) << "Skip copy the same data async from " << src_place << " to "
-                << dst_place;
-        return;
-      }
      memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
                   stream);
    } else {
@@ -127,13 +123,15 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
  auto src_place = src.place();
  auto src_ptr = src.data<void>();
  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  if (src_ptr == dst_ptr && src_place == dst_place) {
+    VLOG(3) << "Skip copy the same data from " << src_place << " to "
+            << dst_place;
+    return;
+  }
  auto size = src.numel() * SizeOfType(src.type());
  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
-    if (src_ptr == dst_ptr) {
-      VLOG(3) << "Skip copy the same data from " << src_place << " to "
-              << dst_place;
-      return;
-    }
    memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
                 boost::get<platform::CPUPlace>(src_place), src_ptr, size);
  }
@@ -153,11 +151,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
  } else if (platform::is_gpu_place(src_place) &&
             platform::is_gpu_place(dst_place)) {
    platform::RecordEvent record_event("TensorCopy:GPU->GPU");
-    if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
-      VLOG(3) << "Skip copy the same data from " << src_place << " to "
-              << dst_place;
-      return;
-    }
    auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
    memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);

--- a/paddle/fluid/operators/lod_reset_op.cc
+++ b/paddle/fluid/operators/lod_reset_op.cc
@@ -205,6 +205,11 @@ class LoDResetGradMaker : public framework::SingleGradOpMaker<T> {
  }
 };
+DECLARE_INPLACE_OP_INFERER(LodResetInplaceInferer, {"X", "Out"});
+DECLARE_INPLACE_OP_INFERER(LodResetGradInplaceInferer,
+                           {framework::GradVarName("Out"),
+                            framework::GradVarName("X")});
 DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference,
                                      "X");
@@ -215,9 +220,10 @@ namespace ops = paddle::operators;
 REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker,
                  ops::LoDResetGradMaker<paddle::framework::OpDesc>,
                  ops::LoDResetGradMaker<paddle::imperative::OpBase>,
-                  ops::LoDResetOpVarTypeInference);
+                  ops::LoDResetOpVarTypeInference, ops::LodResetInplaceInferer);
 REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp,
-                  ops::LoDResetGradNoNeedBufferVarInference);
+                  ops::LoDResetGradNoNeedBufferVarInference,
+                  ops::LodResetGradInplaceInferer);
 REGISTER_OP_CPU_KERNEL(
    lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>,

--- a/paddle/fluid/operators/lod_reset_op.h
+++ b/paddle/fluid/operators/lod_reset_op.h
@@ -31,7 +31,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
    auto* lod_t = ctx.Input<framework::LoDTensor>("Y");
    bool append = ctx.Attr<bool>("append");
-    out->ShareDataWith(*in);
+    framework::TensorCopy(*in, in->place(), out);
    std::vector<int> level0;
    if (lod_t) {
@@ -45,8 +45,8 @@ class LoDResetKernel : public framework::OpKernel<T> {
        return;  // early return, since lod already set
      } else {
        auto* lod = lod_t->data<int>();
+        framework::Tensor lod_cpu;
        if (platform::is_gpu_place(lod_t->place())) {
-          framework::Tensor lod_cpu;
          framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
          lod = lod_cpu.data<int>();
        }
@@ -90,7 +90,7 @@ class LoDResetGradKernel : public framework::OpKernel<T> {
    auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
    auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
-    d_x->ShareDataWith(*d_out);
+    framework::TensorCopy(*d_out, d_out->place(), d_x);
  }
 };
 }  // namespace operators

--- a/paddle/fluid/operators/pad_constant_like_op.h
+++ b/paddle/fluid/operators/pad_constant_like_op.h
@@ -34,8 +34,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> {
    auto* out = context.Output<framework::Tensor>("Out");
    if (in_x->dims() == in_y->dims()) {
-      // TensorCopy(in_y, context.GetPlace(), context, out);
+      framework::TensorCopy(*in_y, context.GetPlace(), out);
-      out->ShareDataWith(*in_y);
      return;
    }
@@ -70,8 +69,7 @@ class PadConstantLikeGradKernel : public framework::OpKernel<T> {
    }
    if (in_dout->dims() == in_y->dims()) {
-      // TensorCopy(in_dout, context.GetPlace(), context, d_y);
+      framework::TensorCopy(*in_dout, context.GetPlace(), d_y);
-      d_y->ShareDataWith(*in_dout);
      return;
    }

--- a/paddle/fluid/operators/sample_logits_op.cu
+++ b/paddle/fluid/operators/sample_logits_op.cu
@@ -155,8 +155,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
          context.Input<Tensor>("CustomizedSamples");
      const Tensor* customized_probabilities =
          context.Input<Tensor>("CustomizedProbabilities");
-      samples->ShareDataWith(*customized_samples);
+      PADDLE_ENFORCE_EQ(customized_samples, samples,
-      probabilities->ShareDataWith(*customized_probabilities);
+                        platform::errors::InvalidArgument(
+                            "CustomizedSamples must be the same Tensor with "
+                            "Samples when use_customized_samples = True"));
+      PADDLE_ENFORCE_EQ(
+          customized_probabilities, probabilities,
+          platform::errors::InvalidArgument(
+              "CustomizedProbabilities must be the same Tensor with "
+              "Probabilities when use_customized_samples = True"));
    } else {
      samples->mutable_data<int64_t>(context.GetPlace());
      probabilities->mutable_data<T>(samples_dim, context.GetPlace());

--- a/paddle/fluid/operators/sample_logits_op.h
+++ b/paddle/fluid/operators/sample_logits_op.h
@@ -195,8 +195,15 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
          context.Input<Tensor>("CustomizedSamples");
      const Tensor* customized_probabilities =
          context.Input<Tensor>("CustomizedProbabilities");
-      samples->ShareDataWith(*customized_samples);
+      PADDLE_ENFORCE_EQ(customized_samples, samples,
-      probabilities->ShareDataWith(*customized_probabilities);
+                        platform::errors::InvalidArgument(
+                            "CustomizedSamples must be the same Tensor with "
+                            "Samples when use_customized_samples = True"));
+      PADDLE_ENFORCE_EQ(
+          customized_probabilities, probabilities,
+          platform::errors::InvalidArgument(
+              "CustomizedProbabilities must be the same Tensor with "
+              "Probabilities when use_customized_samples = True"));
    } else {
      samples->mutable_data<int64_t>(context.GetPlace());
      probabilities->mutable_data<T>(samples_dim, context.GetPlace());

--- a/paddle/fluid/operators/scatter_op.cc
+++ b/paddle/fluid/operators/scatter_op.cc
@@ -130,14 +130,21 @@ class ScatterGradMaker : public framework::SingleGradOpMaker<T> {
 DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ScatterGradNoNeedBufferVarsInference,
                                      "Updates");
+DECLARE_INPLACE_OP_INFERER(ScatterInplaceInferer, {"X", "Out"});
+DECLARE_INPLACE_OP_INFERER(ScatterGradInplaceInferer,
+                           {framework::GradVarName("Out"),
+                            framework::GradVarName("X")});
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker,
                  ops::ScatterGradMaker<paddle::framework::OpDesc>,
-                  ops::ScatterGradMaker<paddle::imperative::OpBase>);
+                  ops::ScatterGradMaker<paddle::imperative::OpBase>,
+                  ops::ScatterInplaceInferer);
 REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp,
-                  ops::ScatterGradNoNeedBufferVarsInference);
+                  ops::ScatterGradNoNeedBufferVarsInference,
+                  ops::ScatterGradInplaceInferer);
 REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>);
 REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>);
--- a/paddle/fluid/operators/scatter_op.cu
+++ b/paddle/fluid/operators/scatter_op.cu
@@ -32,7 +32,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
    auto *Out = ctx.Output<Tensor>("Out");
    bool overwrite = ctx.Attr<bool>("overwrite");
-    Out->ShareDataWith(*X);
+    framework::TensorCopy(*X, ctx.GetPlace(), Out);
    // use template class to support int32_t and int64_t
    const auto &index_type = Ids->type();
    bool index_type_match = index_type == framework::proto::VarType::INT32 ||

--- a/paddle/fluid/operators/scatter_op.h
+++ b/paddle/fluid/operators/scatter_op.h
@@ -36,7 +36,7 @@ class ScatterOpKernel : public framework::OpKernel<T> {
    double overwrite = ctx.Attr<bool>("overwrite");
    // In place output: Out = X, Out[Ids] = Updates
-    framework::TensorCopySync(*X, ctx.GetPlace(), Out);
+    framework::TensorCopy(*X, ctx.GetPlace(), Out);
    // Apply ScatterUpdate: Out[index] = Updates[:]
    const auto &index_type = Ids->type();
    bool index_type_match = index_type == framework::proto::VarType::INT32 ||
@@ -76,7 +76,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
    if (dX) {
      // In place gradient: dX = dO
-      framework::TensorCopySync(*dOut, ctx.GetPlace(), dX);
+      framework::TensorCopy(*dOut, ctx.GetPlace(), dX);
    }
    if (dUpdates) {
      dUpdates->mutable_data<T>(ctx.GetPlace());

--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
@@ -1060,8 +1060,9 @@ def sampled_softmax_with_cross_entropy(logits,
                      logits=fc, label=label, num_samples=25)
    """
    helper = LayerHelper('sample_logits', **locals())
-    samples = helper.create_variable_for_type_inference(dtype='int64')
+    samples = customized_samples if use_customized_samples else helper.create_variable_for_type_inference(
-    probabilities = helper.create_variable_for_type_inference(
+        dtype='int64')
+    probabilities = customized_probabilities if use_customized_samples else helper.create_variable_for_type_inference(
        dtype=logits.dtype)
    sampled_logits \
        = helper.create_variable_for_type_inference(dtype=logits.dtype)