From 63f6ce7bd9494699373ac22267a3234fc9818e5b Mon Sep 17 00:00:00 2001 From: ronnywang <524019753@qq.com> Date: Fri, 23 Jul 2021 18:52:21 +0800 Subject: [PATCH] [NPU] add index_sample_op_npu and tests (#34239) * add index_sample_op_npu and tests * update --- paddle/fluid/operators/index_sample_op_npu.cc | 130 ++++++++++++ .../unittests/npu/test_index_sample_op_npu.py | 193 ++++++++++++++++++ 2 files changed, 323 insertions(+) create mode 100644 paddle/fluid/operators/index_sample_op_npu.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc new file mode 100644 index 0000000000..f5a4100c63 --- /dev/null +++ b/paddle/fluid/operators/index_sample_op_npu.cc @@ -0,0 +1,130 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/index_sample_op.h" + +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class IndexSampleNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* input = ctx.Input("X"); + auto* index = ctx.Input("Index"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + Tensor transformed_index; + const auto& index_type = index->type(); + bool index_type_match = index_type == framework::proto::VarType::INT32 || + index_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ(index_type_match, true, + platform::errors::InvalidArgument( + "Input(Index) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + paddle::framework::DataTypeToString(index_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + if (index_type == framework::proto::VarType::INT32) { + transformed_index.mutable_data(index->dims(), + dev_ctx.GetPlace()); + const auto& cast_runner = NpuOpRunner( + "Cast", {*index}, {transformed_index}, {{"dst_type", ACL_INT64}}); + cast_runner.Run(dev_ctx.stream()); + } else { + transformed_index.ShareDataWith(*index); + } + + const auto& runner = NpuOpRunner( + "GatherElements", {*input, transformed_index}, {*out}, {{"dim", 1}}); + runner.Run(dev_ctx.stream()); + } +}; + +template +void IndexSampleGradScatter(const paddle::platform::NPUDeviceContext& dev_ctx, + const Tensor* index, const Tensor* out_grad, + Tensor* x_grad) { + auto index_dims = index->dims(); + auto input_dims = x_grad->dims(); + auto batch_size = input_dims[0]; + auto index_length = index_dims[1]; + + std::vector scatter_index_vec; + std::vector index_vec; + framework::TensorToVector(*index, dev_ctx, &index_vec); + for (auto i = 0; i < batch_size; ++i) { + for (auto j = 0; j < index_length; j++) { + scatter_index_vec.push_back(i); + scatter_index_vec.push_back(index_vec[i * index_length + j]); + } + } + Tensor scatter_index; + framework::TensorFromVector(scatter_index_vec, dev_ctx, &scatter_index); + scatter_index.Resize({batch_size, index_length, 2}); + + NpuOpRunner runner; + runner.SetType("ScatterNd") + .AddInput(scatter_index) + .AddInput(*out_grad) + .AddInput(framework::vectorize(x_grad->dims())) + .AddOutput(*x_grad); + runner.Run(dev_ctx.stream()); +} + +template +class IndexSampleGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* index = ctx.Input("Index"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + x_grad->mutable_data(ctx.GetPlace()); + + const auto& index_type = index->type(); + if (index_type == framework::proto::VarType::INT32) { + IndexSampleGradScatter(dev_ctx, index, out_grad, x_grad); + } else { + IndexSampleGradScatter(dev_ctx, index, out_grad, x_grad); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL(index_sample, ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel); +REGISTER_OP_NPU_KERNEL(index_sample_grad, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py new file mode 100644 index 0000000000..9b890d22ad --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_index_sample_op_npu.py @@ -0,0 +1,193 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +import unittest +import numpy as np +sys.path.append("..") + +from op_test import OpTest +import paddle +import paddle.fluid as fluid + +paddle.enable_static() + + +class TestIndexSampleOp(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = "index_sample" + self.config() + xnp = np.random.random(self.x_shape).astype(self.dtype) + indexnp = np.random.randint( + low=0, high=self.x_shape[1], + size=self.index_shape).astype(self.index_type) + self.inputs = {'X': xnp, 'Index': indexnp} + index_array = [] + for i in range(self.index_shape[0]): + for j in indexnp[i]: + index_array.append(xnp[i, j]) + index_array = np.array(index_array).astype(self.dtype) + out = np.reshape(index_array, self.index_shape) + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0)) + + def test_check_grad(self): + self.check_grad_with_place(paddle.NPUPlace(0), ['X'], 'Out') + + def config(self): + """ + For multi-dimension input + """ + self.x_shape = (10, 20) + self.dtype = "float32" + self.index_shape = (10, 10) + self.index_type = "int32" + + +class TestCase1(TestIndexSampleOp): + def config(self): + """ + For one dimension input + """ + self.x_shape = (100, 1) + self.dtype = "float32" + self.index_shape = (100, 1) + self.index_type = "int32" + + +class TestCase2(TestIndexSampleOp): + def config(self): + """ + For int64_t index type + """ + self.x_shape = (10, 100) + self.dtype = "float32" + self.index_shape = (10, 10) + self.index_type = "int64" + + +class TestCase3(TestIndexSampleOp): + def config(self): + """ + For int index type + """ + self.x_shape = (10, 100) + self.dtype = "float32" + self.index_shape = (10, 10) + self.index_type = "int32" + + +class TestCase4(TestIndexSampleOp): + def config(self): + """ + For int64 index type + """ + self.x_shape = (10, 128) + self.dtype = "float32" + self.index_shape = (10, 64) + self.index_type = "int64" + + +class TestCase5(TestIndexSampleOp): + def config(self): + """ + For float16 x type + """ + self.__class__.no_need_check_grad = True + self.x_shape = (10, 128) + self.dtype = "float16" + self.index_shape = (10, 64) + self.index_type = "int64" + + def test_check_grad(self): + pass + + +class TestCase6(TestCase5): + def config(self): + """ + For int32 x type + """ + self.__class__.no_need_check_grad = True + self.x_shape = (10, 128) + self.dtype = "int32" + self.index_shape = (10, 64) + self.index_type = "int64" + + +class TestCase7(TestCase5): + def config(self): + """ + For int64 x type + """ + self.__class__.no_need_check_grad = True + self.x_shape = (10, 128) + self.dtype = "int64" + self.index_shape = (10, 64) + self.index_type = "int64" + + +class TestIndexSampleShape(unittest.TestCase): + def test_shape(self): + paddle.enable_static() + # create x value + x_shape = (2, 5) + x_type = "float32" + x_np = np.random.random(x_shape).astype(x_type) + + # create index value + index_shape = (2, 3) + index_type = "int32" + index_np = np.random.randint( + low=0, high=x_shape[1], size=index_shape).astype(index_type) + + x = fluid.data(name='x', shape=[-1, 5], dtype='float32') + index = fluid.data(name='index', shape=[-1, 3], dtype='int32') + output = paddle.index_sample(x=x, index=index) + + place = fluid.NPUPlace(0) + exe = fluid.Executor(place=place) + exe.run(fluid.default_startup_program()) + + feed = {'x': x_np, 'index': index_np} + res = exe.run(feed=feed, fetch_list=[output]) + + +class TestIndexSampleDynamic(unittest.TestCase): + def test_result(self): + with fluid.dygraph.guard(paddle.NPUPlace(0)): + x = paddle.to_tensor( + [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]], + dtype='float32') + index = paddle.to_tensor( + [[0, 1, 2], [1, 2, 3], [0, 0, 0]], dtype='int32') + out_z1 = paddle.index_sample(x, index) + + except_output = np.array( + [[1.0, 2.0, 3.0], [6.0, 7.0, 8.0], [9.0, 9.0, 9.0]]) + assert out_z1.numpy().all() == except_output.all() + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() -- GitLab