From 3f71e8d21c5ebc118d474ea489bf97f9a531422f Mon Sep 17 00:00:00 2001
From: Qi Li <qili93@qq.com>
Date: Thu, 12 Aug 2021 20:22:56 +0800
Subject: [PATCH] [NPU] add meshgrid, test=develop (#34576)

---
 paddle/fluid/operators/meshgrid_op_npu.cc     |  84 +++++++
 .../unittests/npu/test_meshgrid_op_npu.py     | 216 ++++++++++++++++++
 2 files changed, 300 insertions(+)
 create mode 100644 paddle/fluid/operators/meshgrid_op_npu.cc
 create mode 100644 python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py
diff --git a/paddle/fluid/operators/meshgrid_op_npu.cc b/paddle/fluid/operators/meshgrid_op_npu.cc
new file mode 100644
index 00000000000..a72c611a658
--- /dev/null
+++ b/paddle/fluid/operators/meshgrid_op_npu.cc
@@ -0,0 +1,84 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the Licnse. */
+
+#include "paddle/fluid/operators/meshgrid_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename DeviceContext, typename T>
+class MeshgridNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto ins = context.MultiInput<framework::Tensor>("X");
+    auto outs = context.MultiOutput<framework::Tensor>("Out");
+    PADDLE_ENFORCE_EQ(
+        (ins.size() > 1) && (ins.size() < 7), true,
+        platform::errors::InvalidArgument(
+            "Excepted Tensor numbers between 2 and 6, but only received d% .",
+            ins.size()));
+
+    int64_t size = ins.size();
+    std::vector<int64_t> shape(size);
+
+    for (int64_t i = 0; i < size; i++) {
+      switch (ins[i]->dims().size()) {
+        case 0:
+          shape[i] = 1;
+          break;
+        case 1:
+          shape[i] = ins[i]->dims()[0];
+          break;
+        default:
+          PADDLE_THROW(platform::errors::InvalidArgument(
+              "Expected scalar or 1D tensor in the tensor list but got tensor "
+              "%d: ",
+              i));
+      }
+    }
+
+    for (int64_t i = 0; i < size; i++) {
+      std::vector<int64_t> view_shape(size, 1);
+      view_shape[i] = shape[i];
+
+      framework::DDim out_dims_reshape = framework::make_ddim(view_shape);
+      framework::Tensor reshape_ins_tensor(ins[i]->type());
+      reshape_ins_tensor.ShareDataWith(*ins[i]);
+      reshape_ins_tensor.Resize(out_dims_reshape);
+
+      framework::DDim out_dims = framework::make_ddim(shape);
+      outs[i]->Resize(out_dims);
+      outs[i]->mutable_data<T>(context.GetPlace());
+
+      auto stream =
+          context.template device_context<paddle::platform::NPUDeviceContext>()
+              .stream();
+      const auto& runner = NpuOpRunner("BroadcastToD", {reshape_ins_tensor},
+                                       {*(outs[i])}, {{"shape", shape}});
+      runner.Run(stream);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_NPU_KERNEL(
+    meshgrid, ops::MeshgridNPUKernel<plat::NPUDeviceContext, float>,
+    ops::MeshgridNPUKernel<plat::NPUDeviceContext, plat::float16>,
+    ops::MeshgridNPUKernel<plat::NPUDeviceContext, int32_t>);
diff --git a/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py
new file mode 100644
index 00000000000..216a6418ac6
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/npu/test_meshgrid_op_npu.py
@@ -0,0 +1,216 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import sys
+sys.path.append("..")
+from op_test import OpTest, skip_check_grad_ci
+import paddle.fluid as fluid
+import paddle
+from paddle.fluid import compiler, Program, program_guard, core
+
+paddle.enable_static()
+
+
+class TestMeshgridOp(OpTest):
+    def setUp(self):
+        self.set_npu()
+        self.op_type = "meshgrid"
+        self.dtype = self.get_dtype()
+        ins, outs = self.init_test_data()
+        self.inputs = {'X': [('x%d' % i, ins[i]) for i in range(len(ins))]}
+        self.outputs = {
+            'Out': [('out%d' % i, outs[i]) for i in range(len(outs))]
+        }
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.place = paddle.NPUPlace(0)
+
+    def get_dtype(self):
+        return "float32"
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+    def test_check_grad(self):
+        pass
+
+    def init_test_data(self):
+        self.shape = self.get_x_shape()
+        ins = []
+        outs = []
+        for i in range(len(self.shape)):
+            ins.append(np.random.random((self.shape[i], )).astype(self.dtype))
+
+        for i in range(len(self.shape)):
+            out_reshape = [1] * len(self.shape)
+            out_reshape[i] = self.shape[i]
+            out_temp = np.reshape(ins[i], out_reshape)
+            outs.append(np.broadcast_to(out_temp, self.shape))
+        return ins, outs
+
+    def get_x_shape(self):
+        return [100, 200]
+
+
+@skip_check_grad_ci(
+    reason="The backward test is not supported for float16 type on NPU.")
+class TestMeshgridOpFP16(TestMeshgridOp):
+    def get_dtype(self):
+        return "float16"
+
+
+class TestMeshgridOp2(TestMeshgridOp):
+    def get_x_shape(self):
+        return [100, 300]
+
+
+class TestMeshgridOp3(unittest.TestCase):
+    def test_api(self):
+        x = fluid.data(shape=[100], dtype='int32', name='x')
+        y = fluid.data(shape=[200], dtype='int32', name='y')
+
+        input_1 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_2 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_1 = np.reshape(input_1, [100, 1])
+        out_1 = np.broadcast_to(out_1, [100, 200])
+        out_2 = np.reshape(input_2, [1, 200])
+        out_2 = np.broadcast_to(out_2, [100, 200])
+
+        exe = fluid.Executor(place=fluid.NPUPlace(0))
+        grid_x, grid_y = paddle.tensor.meshgrid(x, y)
+        res_1, res_2 = exe.run(fluid.default_main_program(),
+                               feed={'x': input_1,
+                                     'y': input_2},
+                               fetch_list=[grid_x, grid_y])
+
+        self.assertTrue(np.allclose(res_1, out_1))
+        self.assertTrue(np.allclose(res_2, out_2))
+
+
+class TestMeshgridOp4(unittest.TestCase):
+    def test_list_input(self):
+        x = fluid.data(shape=[100], dtype='int32', name='x')
+        y = fluid.data(shape=[200], dtype='int32', name='y')
+
+        input_1 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_2 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_1 = np.reshape(input_1, [100, 1])
+        out_1 = np.broadcast_to(out_1, [100, 200])
+        out_2 = np.reshape(input_2, [1, 200])
+        out_2 = np.broadcast_to(out_2, [100, 200])
+
+        exe = fluid.Executor(place=fluid.NPUPlace(0))
+        grid_x, grid_y = paddle.tensor.meshgrid([x, y])
+        res_1, res_2 = exe.run(fluid.default_main_program(),
+                               feed={'x': input_1,
+                                     'y': input_2},
+                               fetch_list=[grid_x, grid_y])
+
+        self.assertTrue(np.allclose(res_1, out_1))
+        self.assertTrue(np.allclose(res_2, out_2))
+
+
+class TestMeshgridOp5(unittest.TestCase):
+    def test_tuple_input(self):
+        x = fluid.data(shape=[100], dtype='int32', name='x')
+        y = fluid.data(shape=[200], dtype='int32', name='y')
+
+        input_1 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_2 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_1 = np.reshape(input_1, [100, 1])
+        out_1 = np.broadcast_to(out_1, [100, 200])
+        out_2 = np.reshape(input_2, [1, 200])
+        out_2 = np.broadcast_to(out_2, [100, 200])
+
+        exe = fluid.Executor(place=fluid.NPUPlace(0))
+        grid_x, grid_y = paddle.tensor.meshgrid((x, y))
+        res_1, res_2 = exe.run(fluid.default_main_program(),
+                               feed={'x': input_1,
+                                     'y': input_2},
+                               fetch_list=[grid_x, grid_y])
+
+        self.assertTrue(np.allclose(res_1, out_1))
+        self.assertTrue(np.allclose(res_2, out_2))
+
+
+class TestMeshgridOp6(unittest.TestCase):
+    def test_api_with_dygraph(self):
+        paddle.disable_static(paddle.NPUPlace(0))
+        input_3 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_4 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_3 = np.reshape(input_3, [100, 1])
+        out_3 = np.broadcast_to(out_3, [100, 200])
+        out_4 = np.reshape(input_4, [1, 200])
+        out_4 = np.broadcast_to(out_4, [100, 200])
+
+        tensor_3 = paddle.to_tensor(input_3)
+        tensor_4 = paddle.to_tensor(input_4)
+        res_3, res_4 = paddle.tensor.meshgrid(tensor_3, tensor_4)
+
+        self.assertTrue(np.allclose(res_3.numpy(), out_3))
+        self.assertTrue(np.allclose(res_4.numpy(), out_4))
+        paddle.enable_static()
+
+
+class TestMeshgridOp7(unittest.TestCase):
+    def test_api_with_dygraph_list_input(self):
+        paddle.disable_static(paddle.NPUPlace(0))
+        input_3 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_4 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_3 = np.reshape(input_3, [100, 1])
+        out_3 = np.broadcast_to(out_3, [100, 200])
+        out_4 = np.reshape(input_4, [1, 200])
+        out_4 = np.broadcast_to(out_4, [100, 200])
+
+        tensor_3 = paddle.to_tensor(input_3)
+        tensor_4 = paddle.to_tensor(input_4)
+        res_3, res_4 = paddle.meshgrid([tensor_3, tensor_4])
+
+        self.assertTrue(np.allclose(res_3.numpy(), out_3))
+        self.assertTrue(np.allclose(res_4.numpy(), out_4))
+        paddle.enable_static()
+
+
+class TestMeshgridOp8(unittest.TestCase):
+    def test_api_with_dygraph_tuple_input(self):
+        paddle.disable_static(paddle.NPUPlace(0))
+        input_3 = np.random.randint(0, 100, [100, ]).astype('int32')
+        input_4 = np.random.randint(0, 100, [200, ]).astype('int32')
+
+        out_3 = np.reshape(input_3, [100, 1])
+        out_3 = np.broadcast_to(out_3, [100, 200])
+        out_4 = np.reshape(input_4, [1, 200])
+        out_4 = np.broadcast_to(out_4, [100, 200])
+
+        tensor_3 = paddle.to_tensor(input_3)
+        tensor_4 = paddle.to_tensor(input_4)
+        res_3, res_4 = paddle.tensor.meshgrid((tensor_3, tensor_4))
+
+        self.assertTrue(np.allclose(res_3.numpy(), out_3))
+        self.assertTrue(np.allclose(res_4.numpy(), out_4))
+        paddle.enable_static()
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab