未验证 提交 871edade 编写于 作者: P pangyoki 提交者: GitHub

[NPU] slice support Tensor Input (#34067)

上级 113539eb
...@@ -61,11 +61,66 @@ class SliceNPUKernel : public framework::OpKernel<T> { ...@@ -61,11 +61,66 @@ class SliceNPUKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto* out = ctx.Output<Tensor>("Out"); auto* out = ctx.Output<Tensor>("Out");
auto axes = ctx.Attr<std::vector<int>>("axes"); auto axes_int = ctx.Attr<std::vector<int>>("axes");
auto starts = ctx.Attr<std::vector<int>>("starts"); auto starts_int = ctx.Attr<std::vector<int>>("starts");
auto ends = ctx.Attr<std::vector<int>>("ends"); auto ends_int = ctx.Attr<std::vector<int>>("ends");
std::vector<int> axes(axes_int.begin(), axes_int.end());
std::vector<int> starts(starts_int.begin(), starts_int.end());
std::vector<int> ends(ends_int.begin(), ends_int.end());
auto decrease_axis = ctx.Attr<std::vector<int>>("decrease_axis");
auto infer_flags = ctx.Attr<std::vector<int>>("infer_flags");
const auto& in_dims = input->dims(); const auto& in_dims = input->dims();
// Get the accurate attribute value of starts and ends
auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
if (ctx.HasInput("StartsTensor")) {
starts = GetDataFromTensor<int>(ctx.Input<Tensor>("StartsTensor"));
} else if (starts_tensor_list.size() > 0) {
starts = GetDataFromTensorList<int>(starts_tensor_list);
}
auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
if (ctx.HasInput("EndsTensor")) {
ends = GetDataFromTensor<int>(ctx.Input<Tensor>("EndsTensor"));
} else if (ends_tensor_list.size() > 0) {
ends = GetDataFromTensorList<int>(ends_tensor_list);
}
PADDLE_ENFORCE_EQ(
starts.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
if (ctx.HasInput("StartsTensor") || ctx.HasInput("EndsTensor") ||
starts_tensor_list.size() > 0 || ends_tensor_list.size() > 0) {
// Infer output dims
auto out_dims = out->dims();
auto slice_dims = out_dims;
for (size_t i = 0; i < axes.size(); ++i) {
// when start == -1 && end == start+1
if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) {
auto ret =
std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]);
if (ret != decrease_axis.end()) {
ends[i] = in_dims[axes[i]];
}
}
}
CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends);
slice_dims =
GetSliceDims<int>(in_dims, axes, starts, ends, nullptr, nullptr);
out_dims = GetDecreasedDims(slice_dims, decrease_axis);
out->Resize(out_dims);
}
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
std::vector<int> offsets(in_dims.size()); std::vector<int> offsets(in_dims.size());
...@@ -91,9 +146,28 @@ class SliceGradNPUKernel : public framework::OpKernel<T> { ...@@ -91,9 +146,28 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out")); auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dinput = ctx.Output<Tensor>(framework::GradVarName("Input")); auto* dinput = ctx.Output<Tensor>(framework::GradVarName("Input"));
auto axes = ctx.Attr<std::vector<int>>("axes"); auto axes_int = ctx.Attr<std::vector<int>>("axes");
auto starts = ctx.Attr<std::vector<int>>("starts"); auto starts_int = ctx.Attr<std::vector<int>>("starts");
auto ends = ctx.Attr<std::vector<int>>("ends"); auto ends_int = ctx.Attr<std::vector<int>>("ends");
std::vector<int> axes(axes_int.begin(), axes_int.end());
std::vector<int> starts(starts_int.begin(), starts_int.end());
std::vector<int> ends(ends_int.begin(), ends_int.end());
// Get the accurate attribute value of starts and ends
auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
if (ctx.HasInput("StartsTensor")) {
starts = GetDataFromTensor<int>(ctx.Input<Tensor>("StartsTensor"));
} else if (starts_tensor_list.size() > 0) {
starts = GetDataFromTensorList<int>(starts_tensor_list);
}
auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
if (ctx.HasInput("EndsTensor")) {
ends = GetDataFromTensor<int>(ctx.Input<Tensor>("EndsTensor"));
} else if (ends_tensor_list.size() > 0) {
ends = GetDataFromTensorList<int>(ends_tensor_list);
}
const auto& in_dims = input->dims(); const auto& in_dims = input->dims();
int rank = in_dims.size(); int rank = in_dims.size();
......
...@@ -91,6 +91,164 @@ class TestSliceOpFp16(TestSliceOp): ...@@ -91,6 +91,164 @@ class TestSliceOpFp16(TestSliceOp):
self.place = paddle.NPUPlace(0) self.place = paddle.NPUPlace(0)
class TestSliceOpTensor(TestSliceOp):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {
'Input': self.input,
'StartsTensor': self.starts,
'EndsTensor': self.ends
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1, -1, -1],
'ends': [-1, -1, -1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype)
self.starts = np.array([1, 0, 2]).astype('int32')
self.ends = np.array([3, 3, 4]).astype('int32')
self.axes = [0, 1, 2]
self.infer_flags = [-1, -1, -1]
self.out = self.input[1:3, 0:3, 2:4, :]
class TestSliceOpTensor2(TestSliceOpTensor):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {
'Input': self.input,
'StartsTensor': self.starts,
'EndsTensor': self.ends
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1],
'ends': [-1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.random([10, 5, 6]).astype(self.dtype)
self.starts = np.array([0]).astype('int32')
self.ends = np.array([1]).astype('int32')
self.axes = [1]
self.infer_flags = [-1]
self.out = self.input[:, 0:1, :]
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestSliceOpFp16Tensor(TestSliceOpTensor):
def init_dtype(self):
self.dtype = np.float16
def set_npu(self):
self.__class__.use_npu = True
self.__class__.no_need_check_grad = True
self.place = paddle.NPUPlace(0)
class TestSliceOpTensorList(TestSliceOp):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.starts_tensor_list = []
for index, ele in enumerate(self.starts):
self.starts_tensor_list.append(("start" + str(index), np.ones(
(1)).astype('int32') * ele))
self.ends_tensor_list = []
for index, ele in enumerate(self.ends):
self.ends_tensor_list.append(("end" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs = {
'Input': self.input,
'StartsTensorList': self.starts_tensor_list,
'EndsTensorList': self.ends_tensor_list
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1, -1, -1],
'ends': [-1, -1, -1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.random([3, 4, 5, 6]).astype(self.dtype)
self.starts = [1, 0, 2]
self.ends = [3, 3, 4]
self.axes = [0, 1, 2]
self.infer_flags = [-1, -1, -1]
self.out = self.input[1:3, 0:3, 2:4, :]
class TestSliceOpTensorList2(TestSliceOpTensorList):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.starts_tensor_list = []
for index, ele in enumerate(self.starts):
self.starts_tensor_list.append(("start" + str(index), np.ones(
(1)).astype('int32') * ele))
self.ends_tensor_list = []
for index, ele in enumerate(self.ends):
self.ends_tensor_list.append(("end" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs = {
'Input': self.input,
'StartsTensorList': self.starts_tensor_list,
'EndsTensorList': self.ends_tensor_list
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1],
'ends': [-1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.random([10, 5, 6]).astype(self.dtype)
self.starts = np.array([0]).astype('int32')
self.ends = np.array([1]).astype('int32')
self.axes = [1]
self.infer_flags = [-1]
self.out = self.input[:, 0:1, :]
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestSliceOpFp16TensorList(TestSliceOpTensorList):
def init_dtype(self):
self.dtype = np.float16
def set_npu(self):
self.__class__.use_npu = True
self.__class__.no_need_check_grad = True
self.place = paddle.NPUPlace(0)
@unittest.skipIf(not paddle.is_compiled_with_npu(), @unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU") "core is not compiled with NPU")
class TestSliceNet(unittest.TestCase): class TestSliceNet(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册