fix stride legacy inplace bug (#56418)

ed9ec699 · wanghuancoder · GitHub · e5b71671 · ed9ec699 · ed9ec699
3 changed file
--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -410,6 +410,9 @@ void Tracer::TraceOp(const std::string& type,
  VLOG(6) << "Running On Eager TraceOp with use_default_attr_map: "
          << use_default_attr_map;
  std::map<phi::DenseTensor*, phi::DenseTensor*> need_backup_inputs2outputs;
+  std::map<phi::DenseTensor*, std::shared_ptr<phi::Allocation>>
+      need_backup_inputs2holder;
+  std::map<phi::DenseTensor*, phi::DDim> need_backup_inputs2strides;
  if (FLAGS_use_stride_kernel) {
    for (auto& iter : inplace_map) {
      auto inputs_iter = ins.find(iter.first);
@@ -426,11 +429,12 @@ void Tracer::TraceOp(const std::string& type,
                outputs_iter->second[i]
                    ->MutableVar()
                    ->GetMutable<phi::DenseTensor>();
+            need_backup_inputs2holder[dense_tensor] = dense_tensor->Holder();
+            need_backup_inputs2strides[dense_tensor] = dense_tensor->strides();
          }
        }
      }
    }
-
    TraceOpImpl<egr::EagerVariable>(type,
                                    ins,
                                    outs,
@@ -443,7 +447,11 @@ void Tracer::TraceOp(const std::string& type,

    auto dev_ctx = paddle::platform::DeviceContextPool::Instance().Get(place);
    for (auto& iter : need_backup_inputs2outputs) {
-      paddle::experimental::TransStride(dev_ctx, iter.second, iter.first);
+      iter.first->ResetHolder(need_backup_inputs2holder[iter.first]);
+      iter.first->set_strides(need_backup_inputs2strides[iter.first]);
+      paddle::experimental::TransStrideLegacy(dev_ctx, iter.second, iter.first);
+      iter.second->ResetHolder(need_backup_inputs2holder[iter.first]);
+      iter.second->set_strides(need_backup_inputs2strides[iter.first]);
    }
  } else {
    TraceOpImpl<egr::EagerVariable>(type,

--- a/paddle/phi/api/lib/api_gen_utils.cc
+++ b/paddle/phi/api/lib/api_gen_utils.cc
@@ -423,6 +423,56 @@ void TransStride(phi::DeviceContext* dev_ctx,
  }
 }

+void TransStrideLegacy(phi::DeviceContext* dev_ctx,
+                       phi::DenseTensor* from,
+                       phi::DenseTensor* to) {
+  if (to) {
+    auto* cpu_ctx = dynamic_cast<phi::CPUContext*>(dev_ctx);
+    if (cpu_ctx) {
+      PD_VISIT_ALL_TYPES(to->dtype(), "StridedCopyKernel", ([&] {
+                           phi::StridedCopyKernel<data_t, phi::CPUContext>(
+                               *cpu_ctx,
+                               *from,
+                               phi::vectorize<int64_t>(to->dims()),
+                               phi::vectorize<int64_t>(to->strides()),
+                               to->offset(),
+                               to);
+                         }));
+      return;
+    }
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+    auto* gpu_ctx = dynamic_cast<phi::GPUContext*>(dev_ctx);
+    if (gpu_ctx) {
+      PD_VISIT_ALL_TYPES(to->dtype(), "StridedCopyKernel", ([&] {
+                           phi::StridedCopyKernel<data_t, phi::GPUContext>(
+                               *gpu_ctx,
+                               *from,
+                               phi::vectorize<int64_t>(to->dims()),
+                               phi::vectorize<int64_t>(to->strides()),
+                               to->offset(),
+                               to);
+                         }));
+      return;
+    }
+#endif
+#ifdef PADDLE_WITH_XPU
+    auto* xpu_ctx = dynamic_cast<phi::XPUContext*>(dev_ctx);
+    if (xpu_ctx) {
+      PD_VISIT_ALL_TYPES(to->dtype(), "StridedCopyKernel", ([&] {
+                           phi::StridedCopyKernel<data_t, phi::XPUContext>(
+                               *xpu_ctx,
+                               *from,
+                               phi::vectorize<int64_t>(to->dims()),
+                               phi::vectorize<int64_t>(to->strides()),
+                               to->offset(),
+                               to);
+                         }));
+      return;
+    }
+#endif
+  }
+}
+
 void TransStride(phi::DeviceContext* dev_ctx,
                 const std::vector<phi::DenseTensor*>& from,
                 const std::vector<phi::DenseTensor*>& to) {

--- a/paddle/phi/api/lib/api_gen_utils.h
+++ b/paddle/phi/api/lib/api_gen_utils.h
@@ -133,6 +133,10 @@ void TransStride(phi::DeviceContext* dev_ctx,
                 phi::SelectedRows* from,
                 phi::SelectedRows* to);

+void TransStrideLegacy(phi::DeviceContext* dev_ctx,
+                       phi::DenseTensor* from,
+                       phi::DenseTensor* to);
+
 #ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */