diff --git a/paddle/fluid/operators/stack_op_npu.cc b/paddle/fluid/operators/stack_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..958655b1f27c680655c20e8f795fc9e4bf37251d --- /dev/null +++ b/paddle/fluid/operators/stack_op_npu.cc @@ -0,0 +1,106 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include +#include +#include + +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/operators/stack_op.h" +#include "paddle/fluid/operators/unsqueeze_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class StackNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto x = ctx.MultiInput("X"); + int32_t N = x.size(); + + PADDLE_ENFORCE_GT( + N, 0, platform::errors::InvalidArgument("number of input Tensor <= 0")); + + std::vector x_list; + for (int i = 0; i < N; i++) { + x_list.push_back(*x[i]); + } + + int axis = ctx.Attr("axis"); + + if (axis < 0) { + axis = axis + x_list[0].dims().size() + 1; + } + auto* out = ctx.Output("Y"); + + auto place = ctx.GetPlace(); + + auto stream = + ctx.template device_context() + .stream(); + + out->mutable_data(place); + + if (axis != 0) { + auto x_dim = x_list[0].dims(); + std::vector vec_dim_tmp; + vec_dim_tmp.push_back(N); + for (auto i = 0; i < x_dim.size(); ++i) { + vec_dim_tmp.push_back(x_dim[i]); + } + + Tensor tmp_stack(out->type()); + tmp_stack.Resize(framework::make_ddim(vec_dim_tmp)); + tmp_stack.mutable_data(ctx.GetPlace()); + + auto runner = + NpuOpRunner("Pack", {x_list}, {tmp_stack}, {{"axis", 0}, {"N", N}}); + runner.Run(stream); + + std::vector vec_trans; + for (auto i = 1; i <= x_dim.size(); ++i) { + vec_trans.push_back(i); + if (i == axis) { + vec_trans.push_back(0); + } + } + + auto runner_trans_final = + NpuOpRunner("TransposeD", {tmp_stack}, {*out}, {{"perm", vec_trans}}); + runner_trans_final.Run(stream); + + } else { + auto runner = + NpuOpRunner("Pack", {x_list}, {*out}, {{"axis", axis}, {"N", N}}); + runner.Run(stream); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + stack, ops::StackNPUKernel, + ops::StackNPUKernel); + +#endif diff --git a/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..6db98be9328a4316821f89ebb5d6c145c6711975 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_stack_op_npu.py @@ -0,0 +1,153 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core + +paddle.enable_static() +SEED = 2021 + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestStack1(OpTest): + def initDefaultParameters(self): + self.num_inputs = 4 + self.input_dim = (5, 6, 7) + self.axis = 0 + self.dtype = 'float32' + + def get_x_names(self): + x_names = [] + for i in range(self.num_inputs): + x_names.append('x{}'.format(i)) + return x_names + + def setUp(self): + self.initDefaultParameters() + self.set_npu() + self.op_type = "stack" + self.place = paddle.NPUPlace(0) + + self.x = [] + for i in range(self.num_inputs): + self.x.append( + np.random.random(size=self.input_dim).astype(self.dtype)) + + tmp = [] + x_names = self.get_x_names() + for i in range(self.num_inputs): + tmp.append((x_names[i], self.x[i])) + + self.inputs = {'X': tmp} + self.outputs = {'Y': np.stack(self.x, axis=self.axis)} + self.attrs = {'axis': self.axis} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +class TestStack2(OpTest): + def initDefaultParameters(self): + self.num_inputs = 4 + self.input_dim = (2, 3, 4) + self.axis = -1 + self.dtype = 'float32' + + def get_x_names(self): + x_names = [] + for i in range(self.num_inputs): + x_names.append('x{}'.format(i)) + return x_names + + def setUp(self): + self.initDefaultParameters() + self.set_npu() + self.op_type = "stack" + self.place = paddle.NPUPlace(0) + + self.x = [] + for i in range(self.num_inputs): + self.x.append( + np.random.random(size=self.input_dim).astype(self.dtype)) + + tmp = [] + x_names = self.get_x_names() + for i in range(self.num_inputs): + tmp.append((x_names[i], self.x[i])) + + self.inputs = {'X': tmp} + self.outputs = {'Y': np.stack(self.x, axis=self.axis)} + self.attrs = {'axis': self.axis} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +class TestStack3(OpTest): + def initDefaultParameters(self): + self.num_inputs = 4 + self.input_dim = (2, 3, 4) + self.axis = 1 + self.dtype = 'float32' + + def get_x_names(self): + x_names = [] + for i in range(self.num_inputs): + x_names.append('x{}'.format(i)) + return x_names + + def setUp(self): + self.initDefaultParameters() + self.set_npu() + self.op_type = "stack" + self.place = paddle.NPUPlace(0) + + self.x = [] + for i in range(self.num_inputs): + self.x.append( + np.random.random(size=self.input_dim).astype(self.dtype)) + + tmp = [] + x_names = self.get_x_names() + for i in range(self.num_inputs): + tmp.append((x_names[i], self.x[i])) + + self.inputs = {'X': tmp} + self.outputs = {'Y': np.stack(self.x, axis=self.axis)} + self.attrs = {'axis': self.axis} + + def set_npu(self): + self.__class__.use_npu = True + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +if __name__ == '__main__': + unittest.main()