diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..7dc06700e00da408f5f2bde91d76fbb9d3e9511a --- /dev/null +++ b/paddle/fluid/operators/slice_op_npu.cc @@ -0,0 +1,126 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the Licnse. */ + +#include +#include + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/operators/slice_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +void UpdateAttr(const framework::DDim in_dims, const std::vector axes, + const std::vector starts, const std::vector ends, + std::vector* offsets, std::vector* size) { + int cnt = 0; + for (int i = 0; i < in_dims.size(); ++i) { + int start = 0; + int end = in_dims[i]; + int axis = axes[cnt]; + + if (axis == i) { + start = starts[cnt]; + end = ends[cnt] <= in_dims[i] ? ends[cnt] : end; + cnt++; + } + + (*offsets)[i] = start; + (*size)[i] = end - start; + } +} + +template +class SliceNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto* out = ctx.Output("Out"); + + auto axes = ctx.Attr>("axes"); + auto starts = ctx.Attr>("starts"); + auto ends = ctx.Attr>("ends"); + + out->mutable_data(ctx.GetPlace()); + + auto in_dims = input->dims(); + std::vector offsets(in_dims.size()); + std::vector size(in_dims.size()); + + UpdateAttr(in_dims, axes, starts, ends, &offsets, &size); + + auto runner = NpuOpRunner("SliceD", {*input}, {*out}, + {{"offsets", offsets}, {"size", size}}); + + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + } +}; + +template +class SliceGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dinput = ctx.Output(framework::GradVarName("Input")); + + auto axes = ctx.Attr>("axes"); + auto starts = ctx.Attr>("starts"); + auto ends = ctx.Attr>("ends"); + + auto in_dims = input->dims(); + int rank = in_dims.size(); + + std::vector offsets(rank); + std::vector size(rank); + UpdateAttr(in_dims, axes, starts, ends, &offsets, &size); + + std::vector> paddings(rank, std::vector(2)); + for (int i = 0; i < rank; ++i) { + paddings[i][0] = static_cast(offsets[i]); + paddings[i][1] = static_cast(in_dims[i] - size[i] - offsets[i]); + } + + dinput->mutable_data(ctx.GetPlace()); + auto stream = + ctx.template device_context() + .stream(); + auto runner = + NpuOpRunner("PadD", {*dout}, {*dinput}, {{"paddings", paddings}}); + runner.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + slice, ops::SliceNPUKernel, + ops::SliceNPUKernel); + +REGISTER_OP_NPU_KERNEL( + slice_grad, + ops::SliceGradNPUKernel, + ops::SliceGradNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8f424129a0b9d25d2b4303b59cac75cf7e37e7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py @@ -0,0 +1,142 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys +sys.path.append("..") +from op_test import OpTest +import paddle + +paddle.enable_static() + +SEED = 2021 +EPOCH = 100 + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestSliceOp(OpTest): + def setUp(self): + self.op_type = "slice" + self.set_npu() + self.init_dtype() + self.config() + self.inputs = {'Input': self.input} + self.outputs = {'Out': self.out} + self.attrs = { + 'axes': self.axes, + 'starts': self.starts, + 'ends': self.ends, + 'infer_flags': self.infer_flags + } + + def config(self): + self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) + self.starts = [1, 0, 2] + self.ends = [3, 3, 4] + self.axes = [0, 1, 2] + self.infer_flags = [1, 1, 1] + self.out = self.input[1:3, 0:3, 2:4, :] + + def init_dtype(self): + self.dtype = np.float32 + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def test_check_output(self): + self.check_output_with_place(self.place, check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestSliceOpFp16(TestSliceOp): + def init_dtype(self): + self.dtype = np.float16 + + def set_npu(self): + self.__class__.use_npu = True + self.__class__.no_need_check_grad = True + self.place = paddle.NPUPlace(0) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestSliceNet(unittest.TestCase): + def _test(self, run_npu=True): + main_prog = paddle.static.Program() + startup_prog = paddle.static.Program() + main_prog.random_seed = SEED + startup_prog.random_seed = SEED + np.random.seed(SEED) + + batch_size = 32 + data_shape = (32, 32) + a_np = np.random.random(size=data_shape).astype('float32') + b_np = np.random.random(size=data_shape).astype('float32') + label_np = np.random.randint(2, size=(batch_size, 1)).astype('int64') + + with paddle.static.program_guard(main_prog, startup_prog): + a = paddle.static.data(name="a", shape=data_shape, dtype='float32') + b = paddle.static.data(name="b", shape=data_shape, dtype='float32') + label = paddle.static.data( + name="label", shape=[batch_size, 1], dtype='int64') + + sum = paddle.add(a, b) + z = paddle.slice(sum, axes=[0, 1], starts=[0, 0], ends=[33, 2]) + + prediction = paddle.static.nn.fc(z, size=2, activation='softmax') + + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label) + loss = paddle.mean(cost) + sgd = paddle.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + if run_npu: + place = paddle.NPUPlace(0) + else: + place = paddle.CPUPlace() + + exe = paddle.static.Executor(place) + exe.run(startup_prog) + print("Start run on {}".format(place)) + for epoch in range(EPOCH): + + pred_res, loss_res = exe.run( + main_prog, + feed={"a": a_np, + "b": b_np, + "label": label_np}, + fetch_list=[prediction, loss]) + if epoch % 10 == 0: + print("Epoch {} | Prediction[0]: {}, Loss: {}".format( + epoch, pred_res[0], loss_res)) + + return pred_res, loss_res + + def test_npu(self): + cpu_pred, cpu_loss = self._test(False) + npu_pred, npu_loss = self._test(True) + + self.assertTrue(np.allclose(npu_pred, cpu_pred)) + self.assertTrue(np.allclose(npu_loss, cpu_loss)) + + +if __name__ == '__main__': + unittest.main()