diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc index 9e6e6f04edbbfd6784ca5d79e126047de0ef9430..1084eadc55c5bcaeb86a1aac5016b996beb5873b 100644 --- a/paddle/fluid/operators/slice_op_npu.cc +++ b/paddle/fluid/operators/slice_op_npu.cc @@ -61,11 +61,66 @@ class SliceNPUKernel : public framework::OpKernel { auto* input = ctx.Input("Input"); auto* out = ctx.Output("Out"); - auto axes = ctx.Attr>("axes"); - auto starts = ctx.Attr>("starts"); - auto ends = ctx.Attr>("ends"); + auto axes_int = ctx.Attr>("axes"); + auto starts_int = ctx.Attr>("starts"); + auto ends_int = ctx.Attr>("ends"); + std::vector axes(axes_int.begin(), axes_int.end()); + std::vector starts(starts_int.begin(), starts_int.end()); + std::vector ends(ends_int.begin(), ends_int.end()); + + auto decrease_axis = ctx.Attr>("decrease_axis"); + auto infer_flags = ctx.Attr>("infer_flags"); + const auto& in_dims = input->dims(); + // Get the accurate attribute value of starts and ends + auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + if (ctx.HasInput("StartsTensor")) { + starts = GetDataFromTensor(ctx.Input("StartsTensor")); + } else if (starts_tensor_list.size() > 0) { + starts = GetDataFromTensorList(starts_tensor_list); + } + + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + if (ctx.HasInput("EndsTensor")) { + ends = GetDataFromTensor(ctx.Input("EndsTensor")); + } else if (ends_tensor_list.size() > 0) { + ends = GetDataFromTensorList(ends_tensor_list); + } + + PADDLE_ENFORCE_EQ( + starts.size(), axes.size(), + platform::errors::InvalidArgument( + "The size of starts must be equal to the size of axes.")); + PADDLE_ENFORCE_EQ( + ends.size(), axes.size(), + platform::errors::InvalidArgument( + "The size of ends must be equal to the size of axes.")); + + if (ctx.HasInput("StartsTensor") || ctx.HasInput("EndsTensor") || + starts_tensor_list.size() > 0 || ends_tensor_list.size() > 0) { + // Infer output dims + auto out_dims = out->dims(); + auto slice_dims = out_dims; + for (size_t i = 0; i < axes.size(); ++i) { + // when start == -1 && end == start+1 + if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { + auto ret = + std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); + if (ret != decrease_axis.end()) { + ends[i] = in_dims[axes[i]]; + } + } + } + + CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); + slice_dims = + GetSliceDims(in_dims, axes, starts, ends, nullptr, nullptr); + out_dims = GetDecreasedDims(slice_dims, decrease_axis); + + out->Resize(out_dims); + } + out->mutable_data(ctx.GetPlace()); std::vector offsets(in_dims.size()); @@ -91,9 +146,28 @@ class SliceGradNPUKernel : public framework::OpKernel { auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dinput = ctx.Output(framework::GradVarName("Input")); - auto axes = ctx.Attr>("axes"); - auto starts = ctx.Attr>("starts"); - auto ends = ctx.Attr>("ends"); + auto axes_int = ctx.Attr>("axes"); + auto starts_int = ctx.Attr>("starts"); + auto ends_int = ctx.Attr>("ends"); + std::vector axes(axes_int.begin(), axes_int.end()); + std::vector starts(starts_int.begin(), starts_int.end()); + std::vector ends(ends_int.begin(), ends_int.end()); + + // Get the accurate attribute value of starts and ends + auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + if (ctx.HasInput("StartsTensor")) { + starts = GetDataFromTensor(ctx.Input("StartsTensor")); + } else if (starts_tensor_list.size() > 0) { + starts = GetDataFromTensorList(starts_tensor_list); + } + + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + if (ctx.HasInput("EndsTensor")) { + ends = GetDataFromTensor(ctx.Input("EndsTensor")); + } else if (ends_tensor_list.size() > 0) { + ends = GetDataFromTensorList(ends_tensor_list); + } + const auto& in_dims = input->dims(); int rank = in_dims.size(); diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py index c57758dca8e5caa8ce5293af0fe4412b6d0a40c3..b56ee8c8c0748b1e8afacd5cab3ab1b721d35cfa 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py @@ -91,6 +91,164 @@ class TestSliceOpFp16(TestSliceOp): self.place = paddle.NPUPlace(0) +class TestSliceOpTensor(TestSliceOp): + def setUp(self): + self.op_type = "slice" + self.set_npu() + self.init_dtype() + self.config() + self.inputs = { + 'Input': self.input, + 'StartsTensor': self.starts, + 'EndsTensor': self.ends + } + self.outputs = {'Out': self.out} + self.attrs = { + 'axes': self.axes, + 'starts': [-1, -1, -1], + 'ends': [-1, -1, -1], + 'infer_flags': self.infer_flags + } + + def config(self): + self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) + self.starts = np.array([1, 0, 2]).astype('int32') + self.ends = np.array([3, 3, 4]).astype('int32') + self.axes = [0, 1, 2] + self.infer_flags = [-1, -1, -1] + self.out = self.input[1:3, 0:3, 2:4, :] + + +class TestSliceOpTensor2(TestSliceOpTensor): + def setUp(self): + self.op_type = "slice" + self.set_npu() + self.init_dtype() + self.config() + self.inputs = { + 'Input': self.input, + 'StartsTensor': self.starts, + 'EndsTensor': self.ends + } + self.outputs = {'Out': self.out} + self.attrs = { + 'axes': self.axes, + 'starts': [-1], + 'ends': [-1], + 'infer_flags': self.infer_flags + } + + def config(self): + self.input = np.random.random([10, 5, 6]).astype(self.dtype) + self.starts = np.array([0]).astype('int32') + self.ends = np.array([1]).astype('int32') + self.axes = [1] + self.infer_flags = [-1] + self.out = self.input[:, 0:1, :] + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestSliceOpFp16Tensor(TestSliceOpTensor): + def init_dtype(self): + self.dtype = np.float16 + + def set_npu(self): + self.__class__.use_npu = True + self.__class__.no_need_check_grad = True + self.place = paddle.NPUPlace(0) + + +class TestSliceOpTensorList(TestSliceOp): + def setUp(self): + self.op_type = "slice" + self.set_npu() + self.init_dtype() + self.config() + + self.starts_tensor_list = [] + for index, ele in enumerate(self.starts): + self.starts_tensor_list.append(("start" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.ends_tensor_list = [] + for index, ele in enumerate(self.ends): + self.ends_tensor_list.append(("end" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'Input': self.input, + 'StartsTensorList': self.starts_tensor_list, + 'EndsTensorList': self.ends_tensor_list + } + self.outputs = {'Out': self.out} + self.attrs = { + 'axes': self.axes, + 'starts': [-1, -1, -1], + 'ends': [-1, -1, -1], + 'infer_flags': self.infer_flags + } + + def config(self): + self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype) + self.starts = [1, 0, 2] + self.ends = [3, 3, 4] + self.axes = [0, 1, 2] + self.infer_flags = [-1, -1, -1] + self.out = self.input[1:3, 0:3, 2:4, :] + + +class TestSliceOpTensorList2(TestSliceOpTensorList): + def setUp(self): + self.op_type = "slice" + self.set_npu() + self.init_dtype() + self.config() + + self.starts_tensor_list = [] + for index, ele in enumerate(self.starts): + self.starts_tensor_list.append(("start" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.ends_tensor_list = [] + for index, ele in enumerate(self.ends): + self.ends_tensor_list.append(("end" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = { + 'Input': self.input, + 'StartsTensorList': self.starts_tensor_list, + 'EndsTensorList': self.ends_tensor_list + } + self.outputs = {'Out': self.out} + self.attrs = { + 'axes': self.axes, + 'starts': [-1], + 'ends': [-1], + 'infer_flags': self.infer_flags + } + + def config(self): + self.input = np.random.random([10, 5, 6]).astype(self.dtype) + self.starts = np.array([0]).astype('int32') + self.ends = np.array([1]).astype('int32') + self.axes = [1] + self.infer_flags = [-1] + self.out = self.input[:, 0:1, :] + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestSliceOpFp16TensorList(TestSliceOpTensorList): + def init_dtype(self): + self.dtype = np.float16 + + def set_npu(self): + self.__class__.use_npu = True + self.__class__.no_need_check_grad = True + self.place = paddle.NPUPlace(0) + + @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestSliceNet(unittest.TestCase):