From b64312fcbaaac5d9c22fd072bc1518a6e5857389 Mon Sep 17 00:00:00 2001
From: Aganlengzi <aganlengzi@gmail.com>
Date: Tue, 10 Aug 2021 18:44:07 +0800
Subject: [PATCH] [NPU] add squared_l2_norm squared_l2_norm_grad and tests
 (#34708)

* [NPU] add squared_l2_norm squared_l2_norm and tests

* [NPU] replace Square&ReduceSumD with SquareSumV1
---
 .../fluid/operators/squared_l2_norm_op_npu.cc | 99 +++++++++++++++++++
 .../npu/test_squared_l2_norm_op_npu.py        | 57 +++++++++++
 2 files changed, 156 insertions(+)
 create mode 100644 paddle/fluid/operators/squared_l2_norm_op_npu.cc
 create mode 100644 python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py
diff --git a/paddle/fluid/operators/squared_l2_norm_op_npu.cc b/paddle/fluid/operators/squared_l2_norm_op_npu.cc
new file mode 100644
index 00000000000..fb4d8fefda7
--- /dev/null
+++ b/paddle/fluid/operators/squared_l2_norm_op_npu.cc
@@ -0,0 +1,99 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/squared_l2_norm_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename DeviceContext, typename T>
+class SquaredL2NormNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &context) const override {
+    auto *x = context.Input<Tensor>("X");
+    auto *out = context.Output<Tensor>("Out");
+
+    auto place = context.GetPlace();
+    auto stream =
+        context.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    std::vector<int> axis;
+    for (int i = 0; i < x->dims().size(); ++i) {
+      axis.push_back(i);
+    }
+    out->mutable_data<T>(place);
+    const auto &runner = NpuOpRunner("SquareSumV1", {*x}, {*out},
+                                     {{"axis", axis}, {"keep_dims", false}});
+    runner.Run(stream);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class SquaredL2NormGradNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &context) const override {
+    auto *x = context.Input<Tensor>("X");
+    auto *x_grad = context.Output<Tensor>(framework::GradVarName("X"));
+    auto *out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
+
+    PADDLE_ENFORCE_EQ(
+        out_grad->numel(), 1,
+        platform::errors::InvalidArgument(
+            "Input(GRAD@Out) of SquaredL2NormGradOP should be a scalar."));
+
+    auto place = context.GetPlace();
+    auto stream =
+        context.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    // broadcast out_grad
+    Tensor broadcasted_out_grad;
+    broadcasted_out_grad.mutable_data<T>(x_grad->dims(), place);
+    const auto &broadcast_runner =
+        NpuOpRunner("BroadcastToD", {*out_grad}, {broadcasted_out_grad},
+                    {{"shape", framework::vectorize(x_grad->dims())}});
+    broadcast_runner.Run(stream);
+    // mul x
+    Tensor tmp_x_grad;
+    tmp_x_grad.mutable_data<T>(x_grad->dims(), place);
+    const auto &mul_x_runner =
+        NpuOpRunner("Mul", {broadcasted_out_grad, *x}, {tmp_x_grad}, {});
+    mul_x_runner.Run(stream);
+    // mul coefficient:2
+    Tensor coefficient;
+    coefficient.mutable_data<T>({1}, place);
+    FillNpuTensorWithConstant<T>(&coefficient, static_cast<T>(2.0));
+    x_grad->mutable_data<T>(place);
+    const auto &mul_coefficient_runner =
+        NpuOpRunner("Mul", {tmp_x_grad, coefficient}, {*x_grad}, {});
+    mul_coefficient_runner.Run(stream);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_NPU_KERNEL(
+    squared_l2_norm,
+    ops::SquaredL2NormNPUKernel<plat::NPUDeviceContext, float>);
+REGISTER_OP_NPU_KERNEL(
+    squared_l2_norm_grad,
+    ops::SquaredL2NormGradNPUKernel<plat::NPUDeviceContext, float>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py
new file mode 100644
index 00000000000..d3ee8df1cd1
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_squared_l2_norm_op_npu.py
@@ -0,0 +1,57 @@
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import unittest
+from numpy import linalg as LA
+import sys
+sys.path.append("..")
+from op_test import OpTest
+import paddle
+
+paddle.enable_static()
+
+
+class TestL2LossOp(OpTest):
+    """Test npu squared_l2_norm
+    """
+
+    def setUp(self):
+        self.set_npu()
+        self.place = paddle.NPUPlace(0)
+        self.op_type = "squared_l2_norm"
+        self.max_relative_error = 0.05
+
+        X = np.random.uniform(-1, 1, (13, 19)).astype("float32")
+        X[np.abs(X) < self.max_relative_error] = 0.1
+        self.inputs = {'X': X}
+        self.outputs = {'Out': np.square(LA.norm(X))}
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+
+    def test_check_output(self):
+        self.check_output_with_place(place=self.place)
+
+    def test_check_grad(self):
+        self.check_grad_with_place(
+            self.place, ['X'],
+            'Out',
+            max_relative_error=self.max_relative_error)
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab