提交 84d221b6 编写于 作者: L liym27 提交者: Aurelius84

improve unsqueeze op to support int, Tensor for argument axes (#20824)

* improve unsqueeze op to support int, Tensor and Tensor list for argument axes.
test=develop

* call Resize only when necessary. test=develop
上级 3da5a772
...@@ -37,6 +37,7 @@ class UnsqueezeOp : public framework::OperatorWithKernel { ...@@ -37,6 +37,7 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_LE(x_dims.size(), 6, PADDLE_ENFORCE_LE(x_dims.size(), 6,
"Invalid dimensions, the rank of Input(X) " "Invalid dimensions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit)"); "should be in the range of [1, 6] (Eigen limit)");
if (!axes.empty()) {
auto out_dims = GetOutputShape(axes, x_dims); auto out_dims = GetOutputShape(axes, x_dims);
ctx->SetOutputDim("Out", out_dims); ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) { if (x_dims[0] == out_dims[0]) {
...@@ -44,6 +45,31 @@ class UnsqueezeOp : public framework::OperatorWithKernel { ...@@ -44,6 +45,31 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
// are the same. // are the same.
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
} }
} else if (ctx->HasInputs("AxesTensorList")) {
auto AxesTensorList = ctx->Inputs("AxesTensorList");
int output_size = x_dims.size() + static_cast<int>(AxesTensorList.size());
PADDLE_ENFORCE_LE(output_size, 6,
"The output tensor's rank should be less than 6.");
std::vector<int> vec_out_dims(output_size, -1);
ctx->SetOutputDim("Out", framework::make_ddim(vec_out_dims));
} else if (ctx->HasInput("AxesTensor")) {
auto axes_dims = ctx->GetInputDim("AxesTensor");
PADDLE_ENFORCE_EQ(
axes_dims.size(), 1,
"Input(AxesTensor)'s dimension of Op(unsqueeze) must be 1. "
"But received AxesTensor's shape = [%s], "
"AxesTensor's dimension = %d.",
axes_dims, axes_dims.size());
PADDLE_ENFORCE_GE(axes_dims[0], 0,
"Input(AxesTensor)'s shape must be known. But received "
"AxesTensor's shape = [%s]",
axes_dims);
int output_size = x_dims.size() + static_cast<int>(axes_dims[0]);
PADDLE_ENFORCE_LE(output_size, 6,
"The output tensor's rank should be less than 6.");
std::vector<int> vec_out_dims(output_size, -1);
ctx->SetOutputDim("Out", framework::make_ddim(vec_out_dims));
}
} }
static framework::DDim GetOutputShape(const std::vector<int> unsqz_dims, static framework::DDim GetOutputShape(const std::vector<int> unsqz_dims,
...@@ -83,19 +109,46 @@ class UnsqueezeOp : public framework::OperatorWithKernel { ...@@ -83,19 +109,46 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
return framework::make_ddim(output_shape); return framework::make_ddim(output_shape);
} }
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
ctx.device_context());
}
framework::OpKernelType GetKernelTypeForVar(
const std::string &var_name, const framework::Tensor &tensor,
const framework::OpKernelType &expected_kernel_type) const override {
if (var_name == "AxesTensor" || var_name == "AxesTensorList") {
return expected_kernel_type;
}
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
}; };
class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker { class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "(Tensor). The input tensor of unsqueeze operator."); AddInput("X", "(Tensor). The input tensor of unsqueeze operator.");
AddInput("AxesTensor",
"(Tensor<int32>, optional). The dimensions to be inserted. "
"If it exists, it will replace Attr(axes).")
.AsDispensable();
AddInput(
"AxesTensorList",
"(vector<Tensor<int32>>, optional). The dimensions to be inserted. "
"If it exists, it will replace Attr(axes)."
"The shape of the element in vector must be [1].")
.AsDuplicable()
.AsDispensable();
AddOutput("Out", "(Tensor). The output tensor of unsqueeze operator."); AddOutput("Out", "(Tensor). The output tensor of unsqueeze operator.");
AddAttr<std::vector<int>>("axes", AddAttr<std::vector<int>>("axes",
"(std::vector<int>). List of integers," "(std::vector<int>). List of integers,"
" indicating the dimensions to be inserted") " indicating the dimensions to be inserted")
.SetDefault({})
.AddCustomChecker([](const std::vector<int> &axes) { .AddCustomChecker([](const std::vector<int> &axes) {
PADDLE_ENFORCE_EQ(!axes.empty(), true,
"Invalid axes, The unsqueeze axes is empty.");
// Validity Check: axes dims (<6). // Validity Check: axes dims (<6).
PADDLE_ENFORCE_LT(static_cast<int>(axes.size()), 6, PADDLE_ENFORCE_LT(static_cast<int>(axes.size()), 6,
"Invalid dimensions, dynamic dimensions should be " "Invalid dimensions, dynamic dimensions should be "
...@@ -136,28 +189,12 @@ class UnsqueezeGradOp : public framework::OperatorWithKernel { ...@@ -136,28 +189,12 @@ class UnsqueezeGradOp : public framework::OperatorWithKernel {
// will be used in unsqueeze_grad, in this way, the framework can reuse // will be used in unsqueeze_grad, in this way, the framework can reuse
// the memory of X immediately the unsqueeze2_op is finished. // the memory of X immediately the unsqueeze2_op is finished.
// Considering compatibility issues, we could not fix unsqueeze2_op // Considering compatibility issues, we could not fix unsqueeze2_op
class Unsqueeze2Op : public framework::OperatorWithKernel { class Unsqueeze2Op : public UnsqueezeOp {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using UnsqueezeOp::UnsqueezeOp;
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, UnsqueezeOp::InferShape(ctx);
"Input(X) of Unsqueeze operator should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
"Output(Out) of Unsqueeze operator should not be null.");
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
const auto &x_dims = ctx->GetInputDim("X"); const auto &x_dims = ctx->GetInputDim("X");
// Validity Check: input tensor dims (<6).
PADDLE_ENFORCE_LE(x_dims.size(), 6,
"Invalid dimensions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit)");
auto out_dims = UnsqueezeOp::GetOutputShape(axes, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", "Out");
}
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ctx->HasOutput("XShape"), true, ctx->HasOutput("XShape"), true,
...@@ -252,12 +289,11 @@ REGISTER_OP_CPU_KERNEL( ...@@ -252,12 +289,11 @@ REGISTER_OP_CPU_KERNEL(
ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::UnsqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
unsqueeze2, unsqueeze2, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, float>,
ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, float>, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, double>,
ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, double>, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int>,
ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int>, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>, ops::UnsqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>);
ops::Unsqueeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
unsqueeze2_grad, unsqueeze2_grad,
ops::Unsqueeze2GradKernel<paddle::platform::CPUDeviceContext, float>, ops::Unsqueeze2GradKernel<paddle::platform::CPUDeviceContext, float>,
......
...@@ -31,11 +31,11 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -31,11 +31,11 @@ REGISTER_OP_CUDA_KERNEL(
ops::UnsqueezeGradKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::UnsqueezeGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
unsqueeze2, unsqueeze2,
ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, float>, ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, float>,
ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, double>, ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, double>,
ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int>, ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int>,
ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int8_t>, ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int8_t>,
ops::Unsqueeze2Kernel<paddle::platform::CUDADeviceContext, int64_t>); ops::UnsqueezeKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
unsqueeze2_grad, unsqueeze2_grad,
ops::Unsqueeze2GradKernel<paddle::platform::CUDADeviceContext, float>, ops::Unsqueeze2GradKernel<paddle::platform::CUDADeviceContext, float>,
......
...@@ -23,17 +23,66 @@ limitations under the License. */ ...@@ -23,17 +23,66 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T>
inline std::vector<T> GetDataFromTensorList(
const std::vector<const framework::Tensor *> &list_tensor) {
std::vector<T> vec_new_data;
for (size_t i = 0; i < list_tensor.size(); ++i) {
auto tensor = list_tensor[i];
PADDLE_ENFORCE_EQ(
tensor->dims(), framework::make_ddim({1}),
"ShapeError: If the element type is Tensor, "
"the element's shape must be [1]. But received the element's shape "
"is [%s]",
tensor->dims());
if (platform::is_gpu_place(tensor->place())) {
framework::Tensor temp;
TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_data.push_back((*temp.data<T>()));
} else {
vec_new_data.push_back((*tensor->data<T>()));
}
}
return vec_new_data;
}
template <typename T>
inline std::vector<T> GetDataFromTensor(const framework::Tensor *x) {
auto *data = x->data<T>();
framework::Tensor cpu_attr_tensor;
if (platform::is_gpu_place(x->place())) {
TensorCopySync(*x, platform::CPUPlace(), &cpu_attr_tensor);
data = cpu_attr_tensor.data<T>();
}
auto vec_data = std::vector<T>(data, data + x->numel());
return vec_data;
}
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class UnsqueezeKernel : public framework::OpKernel<T> { class UnsqueezeKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
auto &axes = context.Attr<std::vector<int>>("axes"); auto axes = context.Attr<std::vector<int>>("axes");
auto *in = context.Input<framework::LoDTensor>("X"); auto *in = context.Input<framework::LoDTensor>("X");
auto *out = context.Output<framework::LoDTensor>("Out"); auto *out = context.Output<framework::LoDTensor>("Out");
auto x_dims = in->dims(); auto x_dims = in->dims();
auto out_dims = GetOutputShape(axes, x_dims);
bool need_resize_out_dims = false;
if (axes.empty()) {
auto axes_tensor_list =
context.MultiInput<framework::Tensor>("AxesTensorList");
if (axes_tensor_list.size() > 0) {
axes = GetDataFromTensorList<int>(axes_tensor_list);
} else if (context.HasInput("AxesTensor")) {
auto *axes_tensor = context.Input<framework::Tensor>("AxesTensor");
axes = GetDataFromTensor<int>(axes_tensor);
}
need_resize_out_dims = true;
}
framework::DDim out_dims = out->dims();
if (need_resize_out_dims) {
out_dims = GetOutputShape(axes, x_dims);
out->Resize(out_dims);
}
out->mutable_data(context.GetPlace(), in->type()); out->mutable_data(context.GetPlace(), in->type());
framework::TensorCopy( framework::TensorCopy(
*in, context.GetPlace(), *in, context.GetPlace(),
...@@ -95,27 +144,6 @@ class UnsqueezeGradKernel : public framework::OpKernel<T> { ...@@ -95,27 +144,6 @@ class UnsqueezeGradKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T>
class Unsqueeze2Kernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *out = context.Output<framework::LoDTensor>("Out");
auto *in = context.Input<framework::LoDTensor>("X");
auto &axes = context.Attr<std::vector<int>>("axes");
auto x_dims = in->dims();
auto out_dims =
UnsqueezeKernel<DeviceContext, T>::GetOutputShape(axes, x_dims);
out->mutable_data(context.GetPlace(), in->type());
framework::TensorCopy(
*in, context.GetPlace(),
context.template device_context<platform::DeviceContext>(), out);
out->Resize(out_dims);
}
};
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class Unsqueeze2GradKernel : public framework::OpKernel<T> { class Unsqueeze2GradKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -8997,7 +8997,7 @@ def unsqueeze(input, axes, name=None): ...@@ -8997,7 +8997,7 @@ def unsqueeze(input, axes, name=None):
Args: Args:
input (Variable): The input Tensor to be unsqueezed. It is a N-D Tensor of data types float32, float64, int32. input (Variable): The input Tensor to be unsqueezed. It is a N-D Tensor of data types float32, float64, int32.
axes (list): List of integers, indicating the dimensions to be inserted. axes (int|list|tuple|Variable): Indicates the dimensions to be inserted. The data type is ``int32`` . If ``axes`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``axes`` is an Variable, it should be an 1-D Tensor .
name (str|None): Name for this layer. name (str|None): Name for this layer.
Returns: Returns:
...@@ -9011,13 +9011,45 @@ def unsqueeze(input, axes, name=None): ...@@ -9011,13 +9011,45 @@ def unsqueeze(input, axes, name=None):
y = fluid.layers.unsqueeze(input=x, axes=[1]) y = fluid.layers.unsqueeze(input=x, axes=[1])
""" """
helper = LayerHelper("unsqueeze", **locals()) if not isinstance(axes, (int, list, tuple, Variable)):
raise TypeError(
"The type of 'axes' in unsqueeze must be int, list, tuple or Variable, but "
"received %s." % (type(axes)))
helper = LayerHelper("unsqueeze2", **locals())
inputs = {"X": input}
attrs = {}
def _to_Variable_list(one_list):
Variable_list = []
for ele in one_list:
if isinstance(ele, Variable):
ele.stop_gradient = True
Variable_list.append(ele)
else:
assert (isinstance(ele, int))
temp_out = helper.create_variable_for_type_inference('int32')
fill_constant([1], 'int32', ele, force_cpu=True, out=temp_out)
Variable_list.append(temp_out)
return Variable_list
if isinstance(axes, int):
axes = [axes]
if isinstance(axes, Variable):
axes.stop_gradient = True
inputs["AxesTensor"] = axes
elif isinstance(axes, (list, tuple)):
contain_var = not all(not isinstance(ele, Variable) for ele in axes)
if contain_var:
inputs["AxesTensorList"] = _to_Variable_list(axes)
else:
attrs["axes"] = axes
out = helper.create_variable_for_type_inference(dtype=input.dtype) out = helper.create_variable_for_type_inference(dtype=input.dtype)
x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op( helper.append_op(
type="unsqueeze2", type="unsqueeze2",
inputs={"X": input}, inputs=inputs,
attrs={"axes": axes}, attrs=attrs,
outputs={"Out": out, outputs={"Out": out,
"XShape": x_shape}) "XShape": x_shape})
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
...@@ -79,5 +79,163 @@ class TestUnsqueezeOp4(TestUnsqueezeOp): ...@@ -79,5 +79,163 @@ class TestUnsqueezeOp4(TestUnsqueezeOp):
self.new_shape = (3, 1, 1, 2, 5, 1) self.new_shape = (3, 1, 1, 2, 5, 1)
# axes is a list(with tensor)
class TestUnsqueezeOp_AxesTensorList(OpTest):
def setUp(self):
self.init_test_case()
self.op_type = "unsqueeze2"
axes_tensor_list = []
for index, ele in enumerate(self.axes):
axes_tensor_list.append(("axes" + str(index), np.ones(
(1)).astype('int32') * ele))
self.inputs = {
"X": np.random.random(self.ori_shape).astype("float32"),
"AxesTensorList": axes_tensor_list
}
self.init_attrs()
self.outputs = {
"Out": self.inputs["X"].reshape(self.new_shape),
"XShape": np.random.random(self.ori_shape).astype("float32")
}
def test_check_output(self):
self.check_output(no_check_set=["XShape"])
def test_check_grad(self):
self.check_grad(["X"], "Out")
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (1, 2)
self.new_shape = (3, 1, 1, 5)
def init_attrs(self):
self.attrs = {}
class TestUnsqueezeOp1_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (-1, )
self.new_shape = (3, 5, 1)
class TestUnsqueezeOp2_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (0, -1)
self.new_shape = (1, 3, 5, 1)
class TestUnsqueezeOp3_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (0, 3, 3)
self.new_shape = (1, 3, 2, 1, 1, 5)
class TestUnsqueezeOp4_AxesTensorList(TestUnsqueezeOp_AxesTensorList):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (3, 1, 1)
self.new_shape = (3, 1, 1, 2, 5, 1)
# axes is a Tensor
class TestUnsqueezeOp_AxesTensor(OpTest):
def setUp(self):
self.init_test_case()
self.op_type = "unsqueeze2"
self.inputs = {
"X": np.random.random(self.ori_shape).astype("float32"),
"AxesTensor": np.array(self.axes).astype("int32")
}
self.init_attrs()
self.outputs = {
"Out": self.inputs["X"].reshape(self.new_shape),
"XShape": np.random.random(self.ori_shape).astype("float32")
}
def test_check_output(self):
self.check_output(no_check_set=["XShape"])
def test_check_grad(self):
self.check_grad(["X"], "Out")
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (1, 2)
self.new_shape = (3, 1, 1, 5)
def init_attrs(self):
self.attrs = {}
class TestUnsqueezeOp1_AxesTensor(TestUnsqueezeOp_AxesTensor):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (-1, )
self.new_shape = (3, 5, 1)
class TestUnsqueezeOp2_AxesTensor(TestUnsqueezeOp_AxesTensor):
def init_test_case(self):
self.ori_shape = (3, 5)
self.axes = (0, -1)
self.new_shape = (1, 3, 5, 1)
class TestUnsqueezeOp3_AxesTensor(TestUnsqueezeOp_AxesTensor):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (0, 3, 3)
self.new_shape = (1, 3, 2, 1, 1, 5)
class TestUnsqueezeOp4_AxesTensor(TestUnsqueezeOp_AxesTensor):
def init_test_case(self):
self.ori_shape = (3, 2, 5)
self.axes = (3, 1, 1)
self.new_shape = (3, 1, 1, 2, 5, 1)
# test api
class TestUnsqueezeAPI(OpTest):
def test_api(self):
input = np.random.random([3, 2, 5]).astype("float32")
x = fluid.data(name='x', shape=[3, 2, 5], dtype="float32")
positive_3 = fluid.layers.fill_constant([1], "int32", 3)
axes_tensor = fluid.data(name='axes_tensor', shape=[3], dtype="int32")
out_1 = fluid.layers.unsqueeze(x, axes=[3, 1, 1])
out_2 = fluid.layers.unsqueeze(x, axes=[positive_3, 1, 1])
out_3 = fluid.layers.unsqueeze(x, axes=axes_tensor)
out_4 = fluid.layers.unsqueeze(x, axes=3)
exe = fluid.Executor(place=fluid.CPUPlace())
res_1, res_2, res_3, res_4 = exe.run(
fluid.default_main_program(),
feed={
"x": input,
"axes_tensor": np.array([3, 1, 1]).astype("int32")
},
fetch_list=[out_1, out_2, out_3, out_4])
assert np.array_equal(res_1, input.reshape([3, 1, 1, 2, 5, 1]))
assert np.array_equal(res_2, input.reshape([3, 1, 1, 2, 5, 1]))
assert np.array_equal(res_3, input.reshape([3, 1, 1, 2, 5, 1]))
assert np.array_equal(res_4, input.reshape([3, 2, 5, 1]))
def test_error(self):
def test_axes_type():
x2 = fluid.data(name="x2", shape=[2, 25], dtype="int32")
fluid.layers.unsqueeze(x2, axes=2.1)
self.assertRaises(TypeError, test_axes_type)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册