Add sparse.coalesce (#44256)

* add sparse api coalesce

Add sparse.coalesce (#44256)
* add sparse api coalesce
fd6b1a02 · zhangkaihuo · GitHub · 77c010a0 · fd6b1a02 · fd6b1a02
11 changed file
--- a/paddle/phi/api/yaml/sparse_api.yaml
+++ b/paddle/phi/api/yaml/sparse_api.yaml
@@ -266,6 +266,13 @@
    layout : x
  backward : values_grad
+- api: coalesce
+  args : (Tensor x)
+  output : Tensor(out)
+  kernel :
+    func: coalesce{sparse_coo -> sparse_coo}
+    layout : x
 - api: full_like
  args : (Tensor x, Scalar value, DataType dtype=DataType::UNDEFINED)
  output : Tensor(out)

--- a/paddle/phi/kernels/sparse/coalesced_kernel.h
+++ b/paddle/phi/kernels/sparse/coalesced_kernel.h
@@ -22,9 +22,16 @@ namespace phi {
 namespace sparse {
 template <typename T, typename Context>
-void CoalescedKernel(const Context& dev_ctx,
+void CoalesceKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,
                    SparseCooTensor* out);
+template <typename T, typename Context>
+SparseCooTensor Coalesce(const Context& dev_ctx, const SparseCooTensor& x) {
+  SparseCooTensor coo;
+  CoalesceKernel<T, Context>(dev_ctx, x, &coo);
+  return coo;
+}
 }  // namespace sparse
 }  // namespace phi
--- a/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/coalesced_kernel.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/phi/kernels/sparse/coalesced_kernel.h"
+#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/visit_type.h"
@@ -22,7 +22,7 @@ namespace phi {
 namespace sparse {
 template <typename T, typename IntT>
-void CoalescedCPUKernel(const CPUContext& dev_ctx,
+void CoalesceCPUKernel(const CPUContext& dev_ctx,
                       const SparseCooTensor& x,
                       SparseCooTensor* out) {
  const DenseTensor& x_indices = x.non_zero_indices();
@@ -95,22 +95,22 @@ void CoalescedCPUKernel(const CPUContext& dev_ctx,
 }
 template <typename T, typename Context>
-void CoalescedKernel(const Context& dev_ctx,
+void CoalesceKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,
                    SparseCooTensor* out) {
  PD_VISIT_INTEGRAL_TYPES(
-      x.non_zero_indices().dtype(), "CoalescedCPUKernel", ([&] {
+      x.non_zero_indices().dtype(), "CoalesceCPUKernel", ([&] {
-        CoalescedCPUKernel<T, data_t>(dev_ctx, x, out);
+        CoalesceCPUKernel<T, data_t>(dev_ctx, x, out);
      }));
 }
 }  // namespace sparse
 }  // namespace phi
-PD_REGISTER_KERNEL(sort,
+PD_REGISTER_KERNEL(coalesce,
                   CPU,
                   ALL_LAYOUT,
-                   phi::sparse::CoalescedKernel,
+                   phi::sparse::CoalesceKernel,
                   float,
                   double,
                   phi::dtype::float16,

--- a/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/coalesced_kernel.cu
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/phi/kernels/sparse/coalesced_kernel.h"
+#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
@@ -27,7 +27,7 @@ namespace phi {
 namespace sparse {
 template <typename T, typename IntT>
-void CoalescedGPUKernel(const GPUContext& dev_ctx,
+void CoalesceGPUKernel(const GPUContext& dev_ctx,
                       const SparseCooTensor& x,
                       SparseCooTensor* out) {
  const DenseTensor& x_indices = x.non_zero_indices();
@@ -55,11 +55,7 @@ void CoalescedGPUKernel(const GPUContext& dev_ctx,
  phi::backends::gpu::GpuMemcpyAsync(d_sparse_offsets.data<IntT>(),
                                     sparse_offsets.data(),
                                     sizeof(IntT) * sparse_dim,
-#ifdef PADDLE_WITH_HIP
+                                     gpuMemcpyHostToDevice,
-                                     hipMemcpyHostToDevice,
-#else
-                                     cudaMemcpyHostToDevice,
-#endif
                                     dev_ctx.stream());
  // 1. flatten indices
@@ -117,11 +113,7 @@ void CoalescedGPUKernel(const GPUContext& dev_ctx,
  phi::backends::gpu::GpuMemcpyAsync(&out_nnz,
                                     out_indices.data<IntT>(),
                                     sizeof(IntT),
-#ifdef PADDLE_WITH_HIP
+                                     gpuMemcpyDeviceToHost,
-                                     hipMemcpyDeviceToHost,
-#else
-                                     cudaMemcpyDeviceToHost,
-#endif
                                     dev_ctx.stream());
  dev_ctx.Wait();
@@ -161,22 +153,21 @@ void CoalescedGPUKernel(const GPUContext& dev_ctx,
 }
 template <typename T, typename Context>
-void CoalescedKernel(const Context& dev_ctx,
+void CoalesceKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,
                    SparseCooTensor* out) {
  PD_VISIT_INTEGRAL_TYPES(
-      x.non_zero_indices().dtype(), "CoalescedGPUKernel", ([&] {
+      x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
-        CoalescedGPUKernel<T, data_t>(dev_ctx, x, out);
+        CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
      }));
 }
 }  // namespace sparse
 }  // namespace phi
-PD_REGISTER_KERNEL(sort,
+PD_REGISTER_KERNEL(coalesce,
                   GPU,
                   ALL_LAYOUT,
-                   phi::sparse::CoalescedKernel,
+                   phi::sparse::CoalesceKernel,
                   float,
                   double,
                   phi::dtype::float16,

--- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h
+++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h
@@ -19,7 +19,6 @@ limitations under the License. */
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
 #include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/coalesced_kernel.h"
 namespace phi {
 namespace sparse {
@@ -154,9 +153,8 @@ void SparseCooTensorKernel(const Context& dev_ctx,
                           const DenseTensor& indices,
                           const IntArray& dense_shape,
                           SparseCooTensor* out) {
-  SparseCooTensor before_coalesced(
+  *out =
-      indices, values, phi::make_ddim(dense_shape.GetData()));
+      SparseCooTensor(indices, values, phi::make_ddim(dense_shape.GetData()));
-  CoalescedKernel<T, Context>(dev_ctx, before_coalesced, out);
 }
 }  // namespace sparse

--- a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
 #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
 #include "paddle/phi/kernels/sparse/convolution_kernel.h"
@@ -207,6 +208,8 @@ void TestConv3dBase(const std::vector<IntT>& indices,
                                            subm,
                                            &d_rulebook);
+  SparseCooTensor tmp_d_out = sparse::Coalesce<T>(dev_ctx_gpu, d_out);
  ASSERT_EQ(correct_out_dims.size(), d_out.dims().size());
  ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, d_out.nnz());
  for (int i = 0; i < correct_out_dims.size(); i++) {
@@ -217,7 +220,7 @@ void TestConv3dBase(const std::vector<IntT>& indices,
      dev_ctx_cpu,
      DenseTensorMeta(indices_dtype, {4, d_out.nnz()}, DataLayout::NCHW));
  phi::Copy(dev_ctx_gpu,
-            d_out.non_zero_indices(),
+            tmp_d_out.non_zero_indices(),
            phi::CPUPlace(),
            true,
            &h_indices_tensor);
@@ -231,7 +234,7 @@ void TestConv3dBase(const std::vector<IntT>& indices,
      phi::EmptyLike<T>(dev_ctx_cpu, d_out.non_zero_elements());
  phi::Copy(dev_ctx_gpu,
-            d_out.non_zero_elements(),
+            tmp_d_out.non_zero_elements(),
            phi::CPUPlace(),
            true,
            &h_features_tensor);

--- a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
 #include "paddle/phi/kernels/sparse/sparse_pool_grad_kernel.h"
 #include "paddle/phi/kernels/sparse/sparse_pool_kernel.h"
@@ -157,6 +158,7 @@ void TestMaxPoolBase(const std::vector<IntT>& indices,
                                             dilations,
                                             strides,
                                             &d_rulebook);
+  SparseCooTensor tmp_d_out = sparse::Coalesce<T>(dev_ctx_gpu, d_out);
  ASSERT_EQ(correct_out_dims.size(), d_out.dims().size());
  ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, d_out.nnz());
@@ -168,7 +170,7 @@ void TestMaxPoolBase(const std::vector<IntT>& indices,
      dev_ctx_cpu,
      DenseTensorMeta(indices_dtype, {4, d_out.nnz()}, DataLayout::NCHW));
  phi::Copy(dev_ctx_gpu,
-            d_out.non_zero_indices(),
+            tmp_d_out.non_zero_indices(),
            phi::CPUPlace(),
            true,
            &h_indices_tensor);
@@ -182,7 +184,7 @@ void TestMaxPoolBase(const std::vector<IntT>& indices,
      phi::EmptyLike<T>(dev_ctx_cpu, d_out.non_zero_elements());
  phi::Copy(dev_ctx_gpu,
-            d_out.non_zero_elements(),
+            tmp_d_out.non_zero_elements(),
            phi::CPUPlace(),
            true,
            &h_features_tensor);

--- a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py
@@ -53,6 +53,7 @@ class TestSparseConv(unittest.TestCase):
                groups=1,
                data_format="NDHWC")
            out.backward(out)
+            out = paddle.incubate.sparse.coalesce(out)
            assert np.array_equal(correct_out_values, out.values().numpy())
    def test_subm_conv3d(self):

--- a/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_utils_op.py
@@ -298,6 +298,7 @@ class TestSparseConvert(unittest.TestCase):
                    values = paddle.to_tensor(values, dtype='float32')
                    sparse_x = paddle.incubate.sparse.sparse_coo_tensor(
                        indices, values)
+                    sparse_x = paddle.incubate.sparse.coalesce(sparse_x)
                    indices_sorted = [[0, 1], [1, 0]]
                    values_sorted = [5.0, 1.0]
                    assert np.array_equal(indices_sorted,
@@ -310,6 +311,7 @@ class TestSparseConvert(unittest.TestCase):
                    values = paddle.to_tensor(values, dtype='float32')
                    sparse_x = paddle.incubate.sparse.sparse_coo_tensor(
                        indices, values)
+                    sparse_x = paddle.incubate.sparse.coalesce(sparse_x)
                    values_sorted = [[5.0, 5.0], [1.0, 1.0]]
                    assert np.array_equal(indices_sorted,
                                          sparse_x.indices().numpy())

--- a/python/paddle/incubate/sparse/__init__.py
+++ b/python/paddle/incubate/sparse/__init__.py
@@ -30,6 +30,7 @@ from .unary import abs
 from .unary import pow
 from .unary import cast
 from .unary import neg
+from .unary import coalesce
 from .binary import mv
 from .binary import matmul
@@ -66,4 +67,5 @@ __all__ = [
    'subtract',
    'multiply',
    'divide',
+    'coalesce',
 ]
--- a/python/paddle/incubate/sparse/unary.py
+++ b/python/paddle/incubate/sparse/unary.py
@@ -472,3 +472,34 @@ def abs(x, name=None):
    """
    return _C_ops.final_state_sparse_abs(x)
+@dygraph_only
+def coalesce(x):
+    r"""
+    the coalesced operator include sorted and merge, after coalesced, the indices of x is sorted and unique.
+    Parameters:
+        x (Tensor): the input SparseCooTensor.
+    Returns:
+        Tensor: return the SparseCooTensor after coalesced.
+    Examples:
+        .. code-block:: python
+            import paddle
+            from paddle.incubate import sparse
+            from paddle.fluid.framework import _test_eager_guard
+            with _test_eager_guard():
+                indices = [[0, 0, 1], [1, 1, 2]]
+                values = [1.0, 2.0, 3.0]
+                sp_x = sparse.sparse_coo_tensor(indices, values)
+                sp_x = sparse.coalesce(sp_x)
+                print(sp_x.indices())
+                #[[0, 1], [1, 2]]
+                print(sp_x.values())
+                #[3.0, 3.0]
+	"""
+    return _C_ops.final_state_sparse_coalesce(x)