From 5cd6a707befd4970aed93340043a7555e97af578 Mon Sep 17 00:00:00 2001
From: zhangkaihuo <zhangkaihuo@baidu.com>
Date: Sun, 9 Oct 2022 17:55:35 +0800
Subject: [PATCH] add sync_batch_norm_kernel (#46430)

---
 paddle/fluid/operators/sparse_manual_op.cc    |  2 +-
 paddle/phi/api/yaml/sparse_backward.yaml      | 12 +++
 paddle/phi/api/yaml/sparse_ops.yaml           | 10 +++
 .../sparse/gpu/sync_batch_norm_grad_kernel.cu | 85 +++++++++++++++++++
 .../sparse/gpu/sync_batch_norm_kernel.cu      | 84 ++++++++++++++++++
 .../sparse/sync_batch_norm_grad_kernel.h      | 47 ++++++++++
 .../kernels/sparse/sync_batch_norm_kernel.h   | 47 ++++++++++
 .../paddle/incubate/sparse/nn/layer/norm.py   | 11 +--
 8 files changed, 292 insertions(+), 6 deletions(-)
 create mode 100644 paddle/phi/kernels/sparse/gpu/sync_batch_norm_grad_kernel.cu
 create mode 100644 paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu
 create mode 100644 paddle/phi/kernels/sparse/sync_batch_norm_grad_kernel.h
 create mode 100644 paddle/phi/kernels/sparse/sync_batch_norm_kernel.h
diff --git a/paddle/fluid/operators/sparse_manual_op.cc b/paddle/fluid/operators/sparse_manual_op.cc
index 04e12391b4..327e03af80 100644
--- a/paddle/fluid/operators/sparse_manual_op.cc
+++ b/paddle/fluid/operators/sparse_manual_op.cc
@@ -213,7 +213,7 @@ class SparseBatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
     AddAttr<bool>("fuse_with_relu",
                   "(bool), attribute 4 for sparse_batch_norm op.");
     AddComment(R"DOC(
-TODO: Documentation of sparse_conv3d op.
+TODO: Documentation of sparse_batch_norm op.
 )DOC");
   }
 };
diff --git a/paddle/phi/api/yaml/sparse_backward.yaml b/paddle/phi/api/yaml/sparse_backward.yaml
index 86346ad681..5850acb3c3 100644
--- a/paddle/phi/api/yaml/sparse_backward.yaml
+++ b/paddle/phi/api/yaml/sparse_backward.yaml
@@ -356,6 +356,18 @@
     func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
            subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
 
+- backward_op : sync_batch_norm_grad
+  forward : sync_batch_norm(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
+  args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
+  output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad)
+  infer_meta :
+    func : GeneralTernaryGradInferMeta
+    param : [x, scale, bias]
+  kernel :
+    func : sync_batch_norm_coo_grad{sparse_coo, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense}
+    data_type : out_grad
+  optional : reserve_space
+
 - backward_op : tan_grad
   forward : tan(Tensor x) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
diff --git a/paddle/phi/api/yaml/sparse_ops.yaml b/paddle/phi/api/yaml/sparse_ops.yaml
index 17460fc69e..43f8688fb8 100644
--- a/paddle/phi/api/yaml/sparse_ops.yaml
+++ b/paddle/phi/api/yaml/sparse_ops.yaml
@@ -479,3 +479,13 @@
            transpose_csr{sparse_csr -> sparse_csr}
     layout : x
   backward : transpose_grad
+
+- op : sync_batch_norm
+  args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
+  output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
+  infer_meta :
+    func : BatchNormInferMeta
+  kernel :
+    func : sync_batch_norm_coo{sparse_coo, dense, dense, dense, dense -> sparse_coo, dense, dense, dense, dense, dense}
+    data_type : x
+  backward : sync_batch_norm_grad
diff --git a/paddle/phi/kernels/sparse/gpu/sync_batch_norm_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/sync_batch_norm_grad_kernel.cu
new file mode 100644
index 0000000000..e0805578a0
--- /dev/null
+++ b/paddle/phi/kernels/sparse/gpu/sync_batch_norm_grad_kernel.cu
@@ -0,0 +1,85 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/sparse/sync_batch_norm_grad_kernel.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/sparse/empty_kernel.h"
+#include "paddle/phi/kernels/sync_batch_norm_grad_kernel.h"
+
+namespace phi {
+namespace sparse {
+
+template <typename T, typename Context>
+void SyncBatchNormCooGradKernel(
+    const Context& dev_ctx,
+    const SparseCooTensor& x,
+    const DenseTensor& scale,
+    const DenseTensor& bias,
+    const DenseTensor& saved_mean,
+    const DenseTensor& saved_variance,
+    const paddle::optional<DenseTensor>& reserve_space,
+    const SparseCooTensor& y_grad,
+    float momentum,
+    float epsilon,
+    const std::string& data_layout,
+    bool is_test,
+    bool use_global_stats,
+    bool trainable_statistics,
+    bool fuse_with_relu,
+    SparseCooTensor* x_grad,
+    DenseTensor* scale_grad,
+    DenseTensor* bias_grad) {
+  EmptyLikeCooKernel<T, Context>(dev_ctx, x, x_grad);
+  *scale_grad = phi::EmptyLike<T, Context>(dev_ctx, scale);
+  *bias_grad = phi::EmptyLike<T, Context>(dev_ctx, bias);
+  phi::SyncBatchNormGradKernel<T, Context>(dev_ctx,
+                                           x.values(),
+                                           scale,
+                                           bias,
+                                           saved_mean,
+                                           saved_variance,
+                                           reserve_space,
+                                           y_grad.values(),
+                                           momentum,
+                                           epsilon,
+                                           data_layout,
+                                           is_test,
+                                           use_global_stats,
+                                           trainable_statistics,
+                                           fuse_with_relu,
+                                           x_grad->mutable_values(),
+                                           scale_grad,
+                                           bias_grad);
+}
+
+}  // namespace sparse
+}  // namespace phi
+
+#ifdef PADDLE_WITH_HIP
+PD_REGISTER_KERNEL(sync_batch_norm_coo_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SyncBatchNormCooGradKernel,
+                   float,
+                   phi::dtype::float16) {}
+#else
+PD_REGISTER_KERNEL(sync_batch_norm_coo_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SyncBatchNormCooGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
+#endif
diff --git a/paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu b/paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu
new file mode 100644
index 0000000000..a518148f2c
--- /dev/null
+++ b/paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu
@@ -0,0 +1,84 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/sparse/sync_batch_norm_kernel.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/sparse/empty_kernel.h"
+#include "paddle/phi/kernels/sync_batch_norm_kernel.h"
+
+namespace phi {
+namespace sparse {
+
+template <typename T, typename Context>
+void SyncBatchNormCooKernel(const Context& dev_ctx,
+                            const SparseCooTensor& x,
+                            const DenseTensor& scale,
+                            const DenseTensor& bias,
+                            const DenseTensor& mean,
+                            const DenseTensor& variance,
+                            float momentum,
+                            float epsilon,
+                            const std::string& data_layout,
+                            bool is_test,
+                            bool use_global_stats,
+                            bool trainable_statistics,
+                            bool fuse_with_relu,
+                            SparseCooTensor* y,
+                            DenseTensor* mean_out,
+                            DenseTensor* variance_out,
+                            DenseTensor* saved_mean,
+                            DenseTensor* saved_variance,
+                            DenseTensor* reserve_space) {
+  EmptyLikeCooKernel<T, Context>(dev_ctx, x, y);
+  phi::SyncBatchNormKernel<T, Context>(dev_ctx,
+                                       x.values(),
+                                       scale,
+                                       bias,
+                                       mean,
+                                       variance,
+                                       momentum,
+                                       epsilon,
+                                       data_layout,
+                                       is_test,
+                                       use_global_stats,
+                                       trainable_statistics,
+                                       fuse_with_relu,
+                                       y->mutable_values(),
+                                       mean_out,
+                                       variance_out,
+                                       saved_mean,
+                                       saved_variance,
+                                       reserve_space);
+  y->SetIndicesDict(x.GetIndicesDict());
+}
+
+}  // namespace sparse
+}  // namespace phi
+
+#ifdef PADDLE_WITH_HIP
+PD_REGISTER_KERNEL(sync_batch_norm_coo,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SyncBatchNormCooKernel,
+                   float,
+                   phi::dtype::float16) {}
+#else
+PD_REGISTER_KERNEL(sync_batch_norm_coo,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::sparse::SyncBatchNormCooKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
+#endif
diff --git a/paddle/phi/kernels/sparse/sync_batch_norm_grad_kernel.h b/paddle/phi/kernels/sparse/sync_batch_norm_grad_kernel.h
new file mode 100644
index 0000000000..9591e6f035
--- /dev/null
+++ b/paddle/phi/kernels/sparse/sync_batch_norm_grad_kernel.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/sparse_coo_tensor.h"
+
+namespace phi {
+namespace sparse {
+
+template <typename T, typename Context>
+void SyncBatchNormCooGradKernel(
+    const Context& dev_ctx,
+    const SparseCooTensor& x,
+    const DenseTensor& scale,
+    const DenseTensor& bias,
+    const DenseTensor& saved_mean,
+    const DenseTensor& saved_variance,
+    const paddle::optional<DenseTensor>& reserve_space,
+    const SparseCooTensor& y_grad,
+    float momentum,
+    float epsilon,
+    const std::string& data_layout,
+    bool is_test,
+    bool use_global_stats,
+    bool trainable_statistics,
+    bool fuse_with_relu,
+    SparseCooTensor* x_grad,
+    DenseTensor* scale_grad,
+    DenseTensor* bias_grad);
+
+}  // namespace sparse
+}  // namespace phi
diff --git a/paddle/phi/kernels/sparse/sync_batch_norm_kernel.h b/paddle/phi/kernels/sparse/sync_batch_norm_kernel.h
new file mode 100644
index 0000000000..7ee4baa107
--- /dev/null
+++ b/paddle/phi/kernels/sparse/sync_batch_norm_kernel.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/sparse_coo_tensor.h"
+
+namespace phi {
+namespace sparse {
+
+template <typename T, typename Context>
+void SyncBatchNormCooKernel(const Context& dev_ctx,
+                            const SparseCooTensor& x,
+                            const DenseTensor& scale,
+                            const DenseTensor& bias,
+                            const DenseTensor& mean,
+                            const DenseTensor& variance,
+                            float momentum,
+                            float epsilon,
+                            const std::string& data_layout,
+                            bool is_test,
+                            bool use_global_stats,
+                            bool trainable_statistics,
+                            bool fuse_with_relu,
+                            SparseCooTensor* y,
+                            DenseTensor* mean_out,
+                            DenseTensor* variance_out,
+                            DenseTensor* saved_mean,
+                            DenseTensor* saved_variance,
+                            DenseTensor* reserve_space);
+
+}  // namespace sparse
+}  // namespace phi
diff --git a/python/paddle/incubate/sparse/nn/layer/norm.py b/python/paddle/incubate/sparse/nn/layer/norm.py
index 3334cdab84..51dedaf5d9 100644
--- a/python/paddle/incubate/sparse/nn/layer/norm.py
+++ b/python/paddle/incubate/sparse/nn/layer/norm.py
@@ -296,11 +296,12 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
                              bias_attr, data_format, name)
 
     def forward(self, x):
-        assert x.is_sparse_coo(
-        ), "SyncBatchNorm only support SparseTensor in COO format."
-        out = super(SyncBatchNorm, self).forward(x.values())
-        return paddle.incubate.sparse.sparse_coo_tensor(
-            x.indices(), out, shape=x.shape, stop_gradient=x.stop_gradient)
+        self._check_data_format()
+        sync_batch_norm_out, _, _, _, _, _ = _C_ops.sparse_sync_batch_norm(
+            x, self.weight, self.bias, self._mean, self._variance,
+            self._momentum, self._epsilon, self._data_format, not self.training,
+            False, False, False)
+        return sync_batch_norm_out
 
     @classmethod
     def convert_sync_batchnorm(cls, layer):
-- 
GitLab