[NPU] slice support Tensor Input (#34067)

871edade · pangyoki · GitHub · 113539eb · 871edade · 871edade
Showing with 238 addition and 6 deletion

paddle/fluid/operators/slice_op_npu.cc paddle/fluid/operators/slice_op_npu.cc +80 -6

python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py +158 -0

未找到文件。
--- a/paddle/fluid/operators/slice_op_npu.cc
+++ b/paddle/fluid/operators/slice_op_npu.cc
@@ -61,11 +61,66 @@ class SliceNPUKernel : public framework::OpKernel<T> {
    auto* input = ctx.Input<Tensor>("Input");
    auto* out = ctx.Output<Tensor>("Out");

-    auto axes = ctx.Attr<std::vector<int>>("axes");
-    auto starts = ctx.Attr<std::vector<int>>("starts");
-    auto ends = ctx.Attr<std::vector<int>>("ends");
+    auto axes_int = ctx.Attr<std::vector<int>>("axes");
+    auto starts_int = ctx.Attr<std::vector<int>>("starts");
+    auto ends_int = ctx.Attr<std::vector<int>>("ends");
+    std::vector<int> axes(axes_int.begin(), axes_int.end());
+    std::vector<int> starts(starts_int.begin(), starts_int.end());
+    std::vector<int> ends(ends_int.begin(), ends_int.end());
+
+    auto decrease_axis = ctx.Attr<std::vector<int>>("decrease_axis");
+    auto infer_flags = ctx.Attr<std::vector<int>>("infer_flags");
+
    const auto& in_dims = input->dims();

+    // Get the accurate attribute value of starts and ends
+    auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
+    if (ctx.HasInput("StartsTensor")) {
+      starts = GetDataFromTensor<int>(ctx.Input<Tensor>("StartsTensor"));
+    } else if (starts_tensor_list.size() > 0) {
+      starts = GetDataFromTensorList<int>(starts_tensor_list);
+    }
+
+    auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
+    if (ctx.HasInput("EndsTensor")) {
+      ends = GetDataFromTensor<int>(ctx.Input<Tensor>("EndsTensor"));
+    } else if (ends_tensor_list.size() > 0) {
+      ends = GetDataFromTensorList<int>(ends_tensor_list);
+    }
+
+    PADDLE_ENFORCE_EQ(
+        starts.size(), axes.size(),
+        platform::errors::InvalidArgument(
+            "The size of starts must be equal to the size of axes."));
+    PADDLE_ENFORCE_EQ(
+        ends.size(), axes.size(),
+        platform::errors::InvalidArgument(
+            "The size of ends must be equal to the size of axes."));
+
+    if (ctx.HasInput("StartsTensor") || ctx.HasInput("EndsTensor") ||
+        starts_tensor_list.size() > 0 || ends_tensor_list.size() > 0) {
+      // Infer output dims
+      auto out_dims = out->dims();
+      auto slice_dims = out_dims;
+      for (size_t i = 0; i < axes.size(); ++i) {
+        // when start == -1 && end == start+1
+        if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) {
+          auto ret =
+              std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]);
+          if (ret != decrease_axis.end()) {
+            ends[i] = in_dims[axes[i]];
+          }
+        }
+      }
+
+      CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends);
+      slice_dims =
+          GetSliceDims<int>(in_dims, axes, starts, ends, nullptr, nullptr);
+      out_dims = GetDecreasedDims(slice_dims, decrease_axis);
+
+      out->Resize(out_dims);
+    }
+
    out->mutable_data<T>(ctx.GetPlace());

    std::vector<int> offsets(in_dims.size());
@@ -91,9 +146,28 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {
    auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
    auto* dinput = ctx.Output<Tensor>(framework::GradVarName("Input"));

-    auto axes = ctx.Attr<std::vector<int>>("axes");
-    auto starts = ctx.Attr<std::vector<int>>("starts");
-    auto ends = ctx.Attr<std::vector<int>>("ends");
+    auto axes_int = ctx.Attr<std::vector<int>>("axes");
+    auto starts_int = ctx.Attr<std::vector<int>>("starts");
+    auto ends_int = ctx.Attr<std::vector<int>>("ends");
+    std::vector<int> axes(axes_int.begin(), axes_int.end());
+    std::vector<int> starts(starts_int.begin(), starts_int.end());
+    std::vector<int> ends(ends_int.begin(), ends_int.end());
+
+    // Get the accurate attribute value of starts and ends
+    auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
+    if (ctx.HasInput("StartsTensor")) {
+      starts = GetDataFromTensor<int>(ctx.Input<Tensor>("StartsTensor"));
+    } else if (starts_tensor_list.size() > 0) {
+      starts = GetDataFromTensorList<int>(starts_tensor_list);
+    }
+
+    auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
+    if (ctx.HasInput("EndsTensor")) {
+      ends = GetDataFromTensor<int>(ctx.Input<Tensor>("EndsTensor"));
+    } else if (ends_tensor_list.size() > 0) {
+      ends = GetDataFromTensorList<int>(ends_tensor_list);
+    }
+
    const auto& in_dims = input->dims();
    int rank = in_dims.size();


--- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py
@@ -91,6 +91,164 @@ class TestSliceOpFp16(TestSliceOp):
        self.place = paddle.NPUPlace(0)


+class TestSliceOpTensor(TestSliceOp):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensor': self.starts,
+            'EndsTensor': self.ends
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1, -1, -1],
+            'ends': [-1, -1, -1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype)
+        self.starts = np.array([1, 0, 2]).astype('int32')
+        self.ends = np.array([3, 3, 4]).astype('int32')
+        self.axes = [0, 1, 2]
+        self.infer_flags = [-1, -1, -1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+
+class TestSliceOpTensor2(TestSliceOpTensor):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensor': self.starts,
+            'EndsTensor': self.ends
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1],
+            'ends': [-1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.random([10, 5, 6]).astype(self.dtype)
+        self.starts = np.array([0]).astype('int32')
+        self.ends = np.array([1]).astype('int32')
+        self.axes = [1]
+        self.infer_flags = [-1]
+        self.out = self.input[:, 0:1, :]
+
+
+@unittest.skipIf(not paddle.is_compiled_with_npu(),
+                 "core is not compiled with NPU")
+class TestSliceOpFp16Tensor(TestSliceOpTensor):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.__class__.no_need_check_grad = True
+        self.place = paddle.NPUPlace(0)
+
+
+class TestSliceOpTensorList(TestSliceOp):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+
+        self.starts_tensor_list = []
+        for index, ele in enumerate(self.starts):
+            self.starts_tensor_list.append(("start" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+
+        self.ends_tensor_list = []
+        for index, ele in enumerate(self.ends):
+            self.ends_tensor_list.append(("end" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensorList': self.starts_tensor_list,
+            'EndsTensorList': self.ends_tensor_list
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1, -1, -1],
+            'ends': [-1, -1, -1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype)
+        self.starts = [1, 0, 2]
+        self.ends = [3, 3, 4]
+        self.axes = [0, 1, 2]
+        self.infer_flags = [-1, -1, -1]
+        self.out = self.input[1:3, 0:3, 2:4, :]
+
+
+class TestSliceOpTensorList2(TestSliceOpTensorList):
+    def setUp(self):
+        self.op_type = "slice"
+        self.set_npu()
+        self.init_dtype()
+        self.config()
+
+        self.starts_tensor_list = []
+        for index, ele in enumerate(self.starts):
+            self.starts_tensor_list.append(("start" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+
+        self.ends_tensor_list = []
+        for index, ele in enumerate(self.ends):
+            self.ends_tensor_list.append(("end" + str(index), np.ones(
+                (1)).astype('int32') * ele))
+
+        self.inputs = {
+            'Input': self.input,
+            'StartsTensorList': self.starts_tensor_list,
+            'EndsTensorList': self.ends_tensor_list
+        }
+        self.outputs = {'Out': self.out}
+        self.attrs = {
+            'axes': self.axes,
+            'starts': [-1],
+            'ends': [-1],
+            'infer_flags': self.infer_flags
+        }
+
+    def config(self):
+        self.input = np.random.random([10, 5, 6]).astype(self.dtype)
+        self.starts = np.array([0]).astype('int32')
+        self.ends = np.array([1]).astype('int32')
+        self.axes = [1]
+        self.infer_flags = [-1]
+        self.out = self.input[:, 0:1, :]
+
+
+@unittest.skipIf(not paddle.is_compiled_with_npu(),
+                 "core is not compiled with NPU")
+class TestSliceOpFp16TensorList(TestSliceOpTensorList):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+    def set_npu(self):
+        self.__class__.use_npu = True
+        self.__class__.no_need_check_grad = True
+        self.place = paddle.NPUPlace(0)
+
+
 @unittest.skipIf(not paddle.is_compiled_with_npu(),
                 "core is not compiled with NPU")
 class TestSliceNet(unittest.TestCase):