From d082955e1c464d2a8a1912965b6ba823258e0fc0 Mon Sep 17 00:00:00 2001
From: zhaoyingli <86812880+lili0826@users.noreply.github.com>
Date: Fri, 20 Aug 2021 13:30:07 +0800
Subject: [PATCH] [NPU] Support npu op where and where grad (#34587)

* [NPU] Support npu op where and where grad

* fix use const_cast

* delete a test
---
 paddle/fluid/operators/where_op_npu.cc        |  96 ++++++++++
 .../tests/unittests/npu/test_where_op_npu.py  | 165 ++++++++++++++++++
 2 files changed, 261 insertions(+)
 create mode 100755 paddle/fluid/operators/where_op_npu.cc
 create mode 100755 python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
diff --git a/paddle/fluid/operators/where_op_npu.cc b/paddle/fluid/operators/where_op_npu.cc
new file mode 100755
index 0000000000..6b7f5b1dd5
--- /dev/null
+++ b/paddle/fluid/operators/where_op_npu.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/where_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename DeviceContext, typename T>
+class WhereNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* condition = ctx.Input<framework::Tensor>("Condition");
+    auto* X = ctx.Input<framework::Tensor>("X");
+    auto* Y = ctx.Input<framework::Tensor>("Y");
+    auto* out = ctx.Output<framework::Tensor>("Out");
+    out->mutable_data<T>(ctx.GetPlace());
+
+    const auto& runner =
+        NpuOpRunner("Select", {*condition, *X, *Y}, {*out}, {});
+
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+    runner.Run(stream);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class WhereGradNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* condition = ctx.Input<framework::Tensor>("Condition");
+    auto* dout_t = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
+    auto* dx_t = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
+    auto* dy_t = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
+
+    if (dx_t != nullptr) {
+      dx_t->mutable_data<T>(ctx.GetPlace());
+    }
+    if (dy_t != nullptr) {
+      dy_t->mutable_data<T>(ctx.GetPlace());
+    }
+
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    framework::Tensor tensor_zeros(dout_t->type());
+    tensor_zeros.mutable_data<T>(dout_t->dims(), ctx.GetPlace());
+    const auto& runner =
+        NpuOpRunner("ZerosLike", {*dout_t}, {tensor_zeros}, {});
+    runner.Run(stream);
+
+    if (dx_t != nullptr) {
+      const auto& runner = NpuOpRunner(
+          "Select", {*condition, *dout_t, tensor_zeros}, {*dx_t}, {});
+      runner.Run(stream);
+    }
+    if (dy_t != nullptr) {
+      const auto& runner = NpuOpRunner(
+          "Select", {*condition, tensor_zeros, *dout_t}, {*dy_t}, {});
+      runner.Run(stream);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_NPU_KERNEL(
+    where, ops::WhereNPUKernel<paddle::platform::NPUDeviceContext, float>,
+    ops::WhereNPUKernel<paddle::platform::NPUDeviceContext, double>,
+    ops::WhereNPUKernel<paddle::platform::NPUDeviceContext, int>,
+    ops::WhereNPUKernel<paddle::platform::NPUDeviceContext, int64_t>);
+
+REGISTER_OP_NPU_KERNEL(
+    where_grad,
+    ops::WhereGradNPUKernel<paddle::platform::NPUDeviceContext, float>,
+    ops::WhereGradNPUKernel<paddle::platform::NPUDeviceContext, double>,
+    ops::WhereGradNPUKernel<paddle::platform::NPUDeviceContext, int>,
+    ops::WhereGradNPUKernel<paddle::platform::NPUDeviceContext, int64_t>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
new file mode 100755
index 0000000000..cf877ff287
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_where_op_npu.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function, division
+
+import numpy as np
+import unittest
+import sys
+sys.path.append("..")
+from op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program
+from paddle.fluid.backward import append_backward
+
+paddle.enable_static()
+
+
+class TestNPUWhereOp(OpTest):
+    def setUp(self):
+        self.op_type = "where"
+        self.set_npu()
+        self.init_config()
+        self.inputs = {'Condition': self.cond, 'X': self.x, 'Y': self.y}
+        self.outputs = {'Out': np.where(self.cond, self.x, self.y)}
+
+    def init_config(self):
+        self.x = np.random.uniform(-3, 5, (100)).astype("float64")
+        self.y = np.random.uniform(-3, 5, (100)).astype("float64")
+        self.cond = np.zeros((100)).astype("bool")
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+    def test_check_grad_normal(self):
+        self.check_grad_with_place(self.place, ['X', 'Y'], 'Out')
+
+
+class TestNPUWhereOp2(TestNPUWhereOp):
+    def init_config(self):
+        self.x = np.random.uniform(-5, 5, (60, 2)).astype("float64")
+        self.y = np.random.uniform(-5, 5, (60, 2)).astype("float64")
+        self.cond = np.ones((60, 2)).astype("bool")
+
+
+class TestNPUWhereOp3(TestNPUWhereOp):
+    def init_config(self):
+        self.x = np.random.uniform(-3, 5, (20, 2, 4)).astype("float64")
+        self.y = np.random.uniform(-3, 5, (20, 2, 4)).astype("float64")
+        self.cond = np.array(np.random.randint(2, size=(20, 2, 4)), dtype=bool)
+
+
+class TestNPUWhereAPI(unittest.TestCase):
+    def setUp(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+        self.init_data()
+
+    def init_data(self):
+        self.shape = [10, 15]
+        self.cond = np.array(np.random.randint(2, size=self.shape), dtype=bool)
+        self.x = np.random.uniform(-2, 3, self.shape).astype(np.float32)
+        self.y = np.random.uniform(-2, 3, self.shape).astype(np.float32)
+        self.out = np.where(self.cond, self.x, self.y)
+
+    def ref_x_backward(self, dout):
+        return np.where(self.cond == True, dout, 0)
+
+    def ref_y_backward(self, dout):
+        return np.where(self.cond == False, dout, 0)
+
+    def test_api(self):
+        for x_stop_gradient in [False, True]:
+            for y_stop_gradient in [False, True]:
+                train_prog = fluid.Program()
+                startup = fluid.Program()
+                with fluid.program_guard(train_prog, startup):
+                    cond = fluid.data(
+                        name='cond', shape=self.shape, dtype='bool')
+                    x = fluid.data(name='x', shape=self.shape, dtype='float32')
+                    y = fluid.data(name='y', shape=self.shape, dtype='float32')
+
+                    x.stop_gradient = x_stop_gradient
+                    y.stop_gradient = y_stop_gradient
+
+                    result = paddle.where(cond, x, y)
+                    append_backward(fluid.layers.mean(result))
+
+                    exe = fluid.Executor(self.place)
+                    exe.run(startup)
+
+                    fetch_list = [result, result.grad_name]
+                    if x_stop_gradient is False:
+                        fetch_list.append(x.grad_name)
+                    if y_stop_gradient is False:
+                        fetch_list.append(y.grad_name)
+                    out = exe.run(
+                        train_prog,
+                        feed={'cond': self.cond,
+                              'x': self.x,
+                              'y': self.y},
+                        fetch_list=fetch_list)
+                    assert np.array_equal(out[0], self.out)
+
+                    if x_stop_gradient is False:
+                        assert np.array_equal(out[2],
+                                              self.ref_x_backward(out[1]))
+                        if y.stop_gradient is False:
+                            assert np.array_equal(out[3],
+                                                  self.ref_y_backward(out[1]))
+                    elif y.stop_gradient is False:
+                        assert np.array_equal(out[2],
+                                              self.ref_y_backward(out[1]))
+
+    def test_api_broadcast(self, use_cuda=False):
+        train_prog = fluid.Program()
+        startup = fluid.Program()
+        with fluid.program_guard(train_prog, startup):
+            x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32')
+            y = fluid.layers.data(name='y', shape=[4, 2], dtype='float32')
+            x_i = np.array([[0.9383, 0.1983, 3.2, 1.2]]).astype("float32")
+            y_i = np.array([[1.0, 1.0, 1.0, 1.0],
+                            [1.0, 1.0, 1.0, 1.0]]).astype("float32")
+            result = paddle.where(x > 1, x=x, y=y)
+
+            exe = fluid.Executor(self.place)
+            exe.run(startup)
+
+            out = exe.run(train_prog,
+                          feed={'x': x_i,
+                                'y': y_i},
+                          fetch_list=[result])
+            assert np.array_equal(out[0], np.where(x_i > 1, x_i, y_i))
+
+
+class TestWhereDygraphAPI(unittest.TestCase):
+    def test_api(self):
+        with fluid.dygraph.guard(paddle.NPUPlace(0)):
+            x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float64")
+            y_i = np.array([1.0, 1.0, 1.0, 1.0]).astype("float64")
+            cond_i = np.array([False, False, True, True]).astype("bool")
+            x = fluid.dygraph.to_variable(x_i)
+            y = fluid.dygraph.to_variable(y_i)
+            cond = fluid.dygraph.to_variable(cond_i)
+            out = paddle.where(cond, x, y)
+            assert np.array_equal(out.numpy(), np.where(cond_i, x_i, y_i))
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab