From 7316018d372d8bfdf0bd5f4f5784d817ad8ff66a Mon Sep 17 00:00:00 2001
From: ronnywang <524019753@qq.com>
Date: Sun, 15 Aug 2021 21:01:01 -0500
Subject: [PATCH] [NPU] add p_norm_op_npu (#34695)

* add p_norm_op_npu

* remove p_norm_grad op

* update
---
 paddle/fluid/operators/p_norm_op_npu.cc       |  92 ++++++++++
 .../tests/unittests/npu/test_p_norm_op_npu.py | 160 ++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100644 paddle/fluid/operators/p_norm_op_npu.cc
 create mode 100644 python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
diff --git a/paddle/fluid/operators/p_norm_op_npu.cc b/paddle/fluid/operators/p_norm_op_npu.cc
new file mode 100644
index 0000000000..3c5d1a36e9
--- /dev/null
+++ b/paddle/fluid/operators/p_norm_op_npu.cc
@@ -0,0 +1,92 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/p_norm_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename DeviceContext, typename T>
+class PnormNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* in_x = ctx.Input<framework::Tensor>("X");
+    auto* out_norm = ctx.Output<framework::Tensor>("Out");
+    out_norm->mutable_data<T>(ctx.GetPlace());
+
+    float porder = ctx.Attr<float>("porder");
+    int axis = ctx.Attr<int>("axis");
+    bool keepdim = ctx.Attr<bool>("keepdim");
+
+    auto xdim = in_x->dims();
+    if (axis < 0) axis = xdim.size() + axis;
+
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    int p = 0;
+    bool combine_op =
+        !(porder == 0 || porder == INFINITY || porder == -INFINITY);
+    if (porder == INFINITY) {
+      p = INT_MAX;
+    } else if (porder == -INFINITY) {
+      p = INT_MIN;
+    } else {
+      p = static_cast<int>(porder);
+      float t = 0;
+      float diff = abs(std::modf(porder, &t));
+      if (diff < 1e-5) {
+        combine_op = false;
+      }
+    }
+
+    if (!combine_op) {
+      const auto& runner = NpuOpRunner("LpNorm", {*in_x}, {*out_norm},
+                                       {{"p", p},
+                                        {"axes", std::vector<int32_t>({axis})},
+                                        {"keep_dims", keepdim}});
+      runner.Run(stream);
+    } else {
+      Tensor tmp_x;
+      tmp_x.mutable_data<T>(xdim, ctx.GetPlace());
+
+      const auto& power_runner1 =
+          NpuOpRunner("Power", {*in_x}, {tmp_x},
+                      {{"power", porder}, {"scale", 1.0f}, {"shift", 0.0f}});
+      power_runner1.Run(stream);
+
+      const auto& reduce_runner = NpuOpRunner(
+          "ReduceSumD", {tmp_x}, {*out_norm},
+          {{"axes", std::vector<int32_t>({axis})}, {"keep_dims", keepdim}});
+      reduce_runner.Run(stream);
+
+      const auto& power_runner2 = NpuOpRunner(
+          "Power", {*out_norm}, {*out_norm},
+          {{"power", 1 / porder}, {"scale", 1.0f}, {"shift", 0.0f}});
+      power_runner2.Run(stream);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_NPU_KERNEL(
+    p_norm, ops::PnormNPUKernel<plat::NPUDeviceContext, float>,
+    ops::PnormNPUKernel<plat::NPUDeviceContext, plat::float16>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
new file mode 100644
index 0000000000..9f990c0e29
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
@@ -0,0 +1,160 @@
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import unittest
+import numpy as np
+sys.path.append("..")
+
+import paddle
+from op_test import OpTest
+from test_norm_all import p_norm
+
+paddle.enable_static()
+
+
+class TestPnormOp(OpTest):
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.__class__.no_need_check_grad = True
+
+    def setUp(self):
+        self.set_npu()
+        self.op_type = "p_norm"
+        self.init_test_case()
+        x = (np.random.random(self.shape) + 0.5).astype(self.dtype)
+        norm = p_norm(x, self.axis, self.porder, self.keepdim)
+        self.inputs = {'X': x}
+        self.attrs = {
+            'epsilon': self.epsilon,
+            'axis': self.axis,
+            'keepdim': self.keepdim,
+            'porder': float(self.porder)
+        }
+        self.outputs = {'Out': norm}
+        self.gradient = self.calc_gradient()
+
+    def test_check_output(self):
+        if self.dtype == "float16":
+            self.check_output_with_place(paddle.NPUPlace(0), atol=5e-3)
+        else:
+            self.check_output_with_place(paddle.NPUPlace(0))
+
+    def init_test_case(self):
+        self.shape = [2, 3, 4, 5]
+        self.axis = 1
+        self.epsilon = 1e-12
+        self.porder = 2.0
+        self.keepdim = False
+        self.init_dtype()
+
+    def init_dtype(self):
+        self.dtype = "float32"
+
+    def calc_gradient(self):
+        self.attrs = {
+            'epsilon': self.epsilon,
+            'axis': self.axis,
+            'keepdim': self.keepdim,
+            'porder': float(self.porder)
+        }
+        x = self.inputs["X"]
+        porder = self.attrs["porder"]
+        axis = self.attrs["axis"]
+        if porder == 0:
+            grad = np.zeros(x.shape).astype(x.dtype)
+        elif porder in [float("inf"), float("-inf")]:
+            norm = p_norm(x, axis=axis, porder=porder, keepdims=True)
+            x_abs = np.abs(x)
+            grad = np.sign(x)
+            grad[x_abs != norm] = 0.0
+        else:
+            norm = p_norm(x, axis=axis, porder=porder, keepdims=True)
+            grad = np.power(norm, 1 - porder) * np.power(
+                np.abs(x), porder - 1) * np.sign(x)
+
+        numel = 1
+        for s in x.shape:
+            numel *= s
+        numel /= x.shape[axis]
+        return [grad.astype(x.dtype) * 1 / numel]
+
+
+class TestPnormOp2(TestPnormOp):
+    def init_test_case(self):
+        self.shape = [3, 20, 3]
+        self.axis = 2
+        self.epsilon = 1e-12
+        self.porder = 2.0
+        self.keepdim = True
+        self.init_dtype()
+
+
+class TestPnormOp3(TestPnormOp):
+    def init_test_case(self):
+        self.shape = [3, 20, 3]
+        self.axis = 2
+        self.epsilon = 1e-12
+        self.porder = np.inf
+        self.keepdim = True
+        self.init_dtype()
+
+
+class TestPnormOp4(TestPnormOp3):
+    def init_test_case(self):
+        self.shape = [3, 20, 3]
+        self.axis = 2
+        self.epsilon = 1e-12
+        self.porder = -np.inf
+        self.keepdim = True
+        self.init_dtype()
+
+
+class TestPnormOp5(TestPnormOp3):
+    def init_test_case(self):
+        self.shape = [3, 20, 3]
+        self.axis = 2
+        self.epsilon = 1e-12
+        self.porder = 0
+        self.keepdim = True
+        self.init_dtype()
+
+
+class TestPnormOpfp16(TestPnormOp):
+    def init_dtype(self):
+        self.dtype = "float16"
+
+
+class TestPnormOp2fp16(TestPnormOp2):
+    def init_dtype(self):
+        self.dtype = "float16"
+
+
+class TestPnormOp3fp16(TestPnormOp3):
+    def init_dtype(self):
+        self.dtype = "float16"
+
+
+class TestPnormOp4fp16(TestPnormOp4):
+    def init_dtype(self):
+        self.dtype = "float16"
+
+
+class TestPnormOp5fp16(TestPnormOp5):
+    def init_dtype(self):
+        self.dtype = "float16"
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab