[Eager] Support imperative selected_rows_to_lod_tensor and the opposite case (#39223)

* Added selected_rows and rw_lock to pten * Renamed the unit test target to fix CI * Removed Class SelectedRows in Fluid, changed include/cmake relationship, use pten::SelectedRows in Fluid * Remove rw_lock.h,rw_lock_test.cc in fluid * Use pten::RWLock and pten::AutoRDLock, fix CI * Use pten::SelectedRows * Use pten::SelectedRows * Fix to pass NPU CI * Selected_Rows inherits from TensorBase * Use pten::SelectedRows, to pass NPU CI * To fix NPU CI * To fix NPU CI again * Use paddle/pten/core/enforce and polish code * Support imperative selected_rows_to_lod_tensor * Polish code

[Eager] Support imperative selected_rows_to_lod_tensor and the opposite case (#39223)
* Added selected_rows and rw_lock to pten * Renamed the unit test target to fix CI * Removed Class SelectedRows in Fluid, changed include/cmake relationship, use pten::SelectedRows in Fluid * Remove rw_lock.h,rw_lock_test.cc in fluid * Use pten::RWLock and pten::AutoRDLock, fix CI * Use pten::SelectedRows * Use pten::SelectedRows * Fix to pass NPU CI * Selected_Rows inherits from TensorBase * Use pten::SelectedRows, to pass NPU CI * To fix NPU CI * To fix NPU CI again * Use paddle/pten/core/enforce and polish code * Support imperative selected_rows_to_lod_tensor * Polish code
787980b1 · Weilong Wu · GitHub · 71634a61 · 787980b1 · 787980b1
6 changed file
--- a/paddle/fluid/eager/accumulation/gradient_accumulation.cc
+++ b/paddle/fluid/eager/accumulation/gradient_accumulation.cc
@@ -19,6 +19,7 @@
 #include "paddle/fluid/eager/eager_tensor.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/eigen.h"
+#include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/operators/math/math_function_impl.h"
@@ -259,80 +260,32 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) {
      paddle::framework::DataTypeToString(data_type), place));
 }
-void VariableAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) {
+void VariableAdd(const egr::EagerTensor& src_tensor,
-  // TODO(jiabin): Support other tensor type later
+                 egr::EagerTensor* dst_tensor) {
-  auto* dst_tensor =
+  auto& src = src_tensor.Var();
-      dst->MutableVar()->GetMutable<paddle::framework::LoDTensor>();
+  auto* dst = dst_tensor->MutableVar();
-  auto& src_tensor = src.Var().Get<paddle::framework::LoDTensor>();
+  if (dst->IsType<paddle::framework::LoDTensor>()) {
-  auto numel = src_tensor.numel();
+    if (src.IsType<paddle::framework::LoDTensor>()) {
+      paddle::imperative::TensorAdd(src, dst);
-  // FIXME(minqiyang): loss_grad op will pass a zero grad of label
+    } else if (src.IsType<pten::SelectedRows>()) {
-  // ugly fix for it
+      paddle::imperative::SelectedRowsAddToTensor(src, dst);
-  if (numel == 0) {
+    } else {
-    return;
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
-  }
+          "Unexpected branch, output variable type is %s",
+          paddle::framework::ToTypeName(dst->Type())));
-  PADDLE_ENFORCE_EQ(
-      dst_tensor->numel(), numel,
-      paddle::platform::errors::PreconditionNotMet(
-          "The number of elements of source tensor and destination tensor "
-          "should be equal, but got the number of elements of source tensor is "
-          "%zu and the number of elements of destination tensor is %zu.",
-          numel, dst_tensor->numel()));
-  auto data_type = src_tensor.type();
-  auto place = src_tensor.place();
-  PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type,
-                    paddle::platform::errors::PreconditionNotMet(
-                        "The data type of source tensor and destination tensor "
-                        "should be equal, Otherwise, the calculation results "
-                        "will be incorrect."));
-#define PADDLE_TENSOR_ADD(cpp_type)                                          \
-  if (data_type == paddle::framework::DataTypeTrait<cpp_type>::DataType()) { \
-    TensorAddFunctor<cpp_type> func(                                         \
-        numel, src_tensor.data<cpp_type>(),                                  \
-        dst_tensor->mutable_data<cpp_type>(place));                          \
-    paddle::platform::VisitPlace(place, func);                               \
-    return;                                                                  \
    }
+  } else {
-  // TODO(jiabin): Support NPU here
+    if (src.IsType<paddle::framework::LoDTensor>()) {
-  PADDLE_TENSOR_ADD(float);
+      paddle::framework::Variable new_dst;
-// NOTE(phlrain): xpu only support float
+      paddle::imperative::SelectedRowsAddTensor(*dst, src, &new_dst);
-#ifndef PADDLE_WITH_XPU
+      *dst = std::move(new_dst);
-  PADDLE_TENSOR_ADD(double);
+    } else {
-  // NOTE(chenweihang): only support complex grad tensor accumulated,
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
-  // support selected rows if needed in the future
+          "Unexpected branch, output variable type is %s",
-  PADDLE_TENSOR_ADD(paddle::platform::complex<float>);
+          paddle::framework::ToTypeName(dst->Type())));
-  PADDLE_TENSOR_ADD(paddle::platform::complex<double>);
-#endif
-#undef PADDLE_TENSOR_ADD
-  if (data_type == paddle::framework::proto::VarType::FP16) {
-    if (paddle::platform::is_gpu_place(place)) {
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-      return TensorAddImpl<paddle::platform::CUDADeviceContext,
-                           paddle::platform::float16>(src_tensor, dst_tensor,
-                                                      place);
-#else
-      PADDLE_THROW(paddle::platform::errors::Unimplemented(
-          "Gradient accumulation of data type (%s) on place (%s) is not "
-          "supported in imperative mode",
-          paddle::framework::DataTypeToString(data_type), place));
-#endif
-    } else if (paddle::platform::is_cpu_place(place)) {
-      return TensorAddImpl<paddle::platform::CPUDeviceContext,
-                           paddle::platform::float16>(src_tensor, dst_tensor,
-                                                      place);
    }
  }
-  PADDLE_THROW(paddle::platform::errors::Unimplemented(
-      "Gradient accumulation of data type (%s) on place (%s) is not "
-      "supported in imperative mode",
-      paddle::framework::DataTypeToString(data_type), place));
 }
 }  // namespace egr
--- a/paddle/fluid/eager/legacy/infer_var_type_context.h
+++ b/paddle/fluid/eager/legacy/infer_var_type_context.h
@@ -137,6 +137,10 @@ class TensorRuntimeInferVarTypeContext
        out->MutableVar()->GetMutable<paddle::framework::LoDTensor>();
        break;
      }
+      case paddle::framework::proto::VarType::SELECTED_ROWS: {
+        out->MutableVar()->GetMutable<pten::SelectedRows>();
+        break;
+      }
      default: {
        PADDLE_THROW(paddle::platform::errors::NotFound(
            "Cannot found var type: %s while running runtime InferVarType",

--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -373,8 +373,7 @@ void SelectedRowsAddToTensor(const framework::Variable& src,
      framework::DataTypeToString(data_type)));
 }
-static void SelectedRowsAddTensor(
+void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var,
-    const framework::Variable& src_selected_rows_var,
                           const framework::Variable& src_tensor_var,
                           framework::Variable* dst_tensor_var) {
  const auto& src_selected_rows =

--- a/paddle/fluid/imperative/gradient_accumulator.h
+++ b/paddle/fluid/imperative/gradient_accumulator.h
@@ -163,5 +163,14 @@ class SortedGradientAccumulator : public GradientAccumulator {
  std::vector<SavedVarInfo> tmp_grad_vars_;
 };
+void SelectedRowsAddToTensor(const framework::Variable& src,
+                             framework::Variable* dst);
+void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var,
+                           const framework::Variable& src_tensor_var,
+                           framework::Variable* dst_tensor_var);
+void TensorAdd(const framework::Variable& src, framework::Variable* dst);
 }  // namespace imperative
 }  // namespace paddle
--- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@ from test_imperative_base import new_program_scope
 import numpy as np
 import six
 from utils import DyGraphProgramDescTracerTestHelper
+from paddle.fluid.framework import _test_eager_guard
 class SimpleNet(fluid.Layer):
@@ -74,7 +75,7 @@ class SimpleNet(fluid.Layer):
 class TestDygraphSimpleNet(unittest.TestCase):
-    def test_simple_net(self):
+    def func_simple_net(self):
        for is_sparse in [True, False]:
            dtype_list = ["float32"]
            if not core.is_compiled_with_rocm():
@@ -82,6 +83,11 @@ class TestDygraphSimpleNet(unittest.TestCase):
            for dtype in dtype_list:
                self.simple_net_float32(is_sparse, dtype)
+    def test_simple_net(self):
+        with _test_eager_guard():
+            self.func_simple_net()
+        self.func_simple_net()
    def simple_net_float32(self, is_sparse, dtype):
        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():

--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ from paddle.fluid.dygraph.base import to_variable
 from test_imperative_base import new_program_scope
 import numpy as np
 import six
+from paddle.fluid.framework import _test_eager_guard
 class SimpleNet(fluid.Layer):
@@ -80,7 +81,7 @@ class SimpleNet(fluid.Layer):
 class TestDygraphSimpleNet(unittest.TestCase):
-    def test_simple_net(self):
+    def func_simple_net(self):
        for is_sparse in [True, False]:
            dtype_list = ["float32"]
            if not core.is_compiled_with_rocm():
@@ -88,6 +89,11 @@ class TestDygraphSimpleNet(unittest.TestCase):
            for dtype in dtype_list:
                self.simple_net_float(is_sparse, dtype)
+    def test_simple_net(self):
+        with _test_eager_guard():
+            self.func_simple_net()
+        self.func_simple_net()
    def simple_net_float(self, is_sparse, dtype):
        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():