未验证 提交 9cc1937d 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #13400 from kuke/fix_seq_pad

Get sequence length in sequence_pad op & fix sequence_mask op
......@@ -23,4 +23,8 @@ REGISTER_OP_CPU_KERNEL(
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
int>,
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
int64_t>);
int64_t>,
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
float>,
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
double>);
......@@ -19,4 +19,8 @@ REGISTER_OP_CUDA_KERNEL(
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
int>,
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
int64_t>);
int64_t>,
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
float>,
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
double>);
......@@ -29,10 +29,12 @@ class SequencePadOp : public framework::OperatorWithKernel {
"Input(PadValue) of SequencePadOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequencePadOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Length"),
"Output(Length) of SequencePadOp should not be null.");
auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_GE(x_dims.size(), 2,
"The rank of Input(x) can't be less than 2.");
"The rank of Input(X) can't be less than 2.");
auto time_step_dims = framework::slice_ddim(x_dims, 1, x_dims.size());
auto pad_value_dims = ctx->GetInputDim("PadValue");
PADDLE_ENFORCE(pad_value_dims == framework::make_ddim({1}) ||
......@@ -41,8 +43,8 @@ class SequencePadOp : public framework::OperatorWithKernel {
"shape equals to time steps in sequences");
int out_dim_0 = -1;
int out_dim_1 = -1;
int padded_length = ctx->Attrs().Get<int>("padded_length");
if (ctx->IsRuntime()) {
// run time
framework::Variable* x_var =
......@@ -58,7 +60,6 @@ class SequencePadOp : public framework::OperatorWithKernel {
int seq_num = x_lod_0.size() - 1;
int max_seq_len = math::MaximumSequenceLength(x_lod_0);
int padded_length = ctx->Attrs().Get<int>("padded_length");
if (padded_length == -1) {
padded_length = max_seq_len;
}
......@@ -66,19 +67,30 @@ class SequencePadOp : public framework::OperatorWithKernel {
"The Attr(padded_length) must be -1 or an int greater "
"than the length of the longest original sequence.");
out_dim_0 = seq_num;
out_dim_1 = padded_length;
} else {
// compile time
if (padded_length == -1) {
padded_length = 1;
}
framework::VarDesc* x_desc =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("X")[0]);
PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1);
}
std::vector<int> out_dims_vec{out_dim_0, out_dim_1};
std::vector<int> out_dims_vec{out_dim_0, padded_length};
std::vector<int> len_dims_vec{out_dim_0, 1};
auto time_step_dims_vec = framework::vectorize2int(time_step_dims);
out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(),
time_step_dims_vec.end());
ctx->SetOutputDim("Out", framework::make_ddim(out_dims_vec));
ctx->SetOutputDim("Length", framework::make_ddim(len_dims_vec));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X"));
return framework::OpKernelType(data_type, ctx.device_context());
}
};
......@@ -96,6 +108,10 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput(
"Out",
"(LoDTensor) The output vairable, which contains padded sequences.");
AddOutput(
"Length",
"(LoDTensor) The output vairable, which contains the actual length of "
"sequences before padding.");
AddAttr<int>(
"padded_length",
"The length of padded sequences. It can be setted to -1 or "
......@@ -125,6 +141,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
then we get LoDTensor:
Out.data = [[a, b, 0, 0],
[c, d, e, 0]]
Length.data = [[2], [3]]
Case 2:
......@@ -138,7 +155,8 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
then we get LoDTensor:
Out.data = [[[a1, a2], [b1, b2], [0, 0]],
[[c1, c2], [d1, d2], [e1, e2]]]
Length.data = [[2], [3]]
Case 3:
Given a 1-level LoDTensor input(X):
......@@ -151,6 +169,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
then we get LoDTensor:
Out.data = [[[a1, a2], [b1, b2], [p1, p2]],
[[c1, c2], [d1, d2], [e1, e2]]]
Length.data = [[2], [3]]
)DOC");
}
......@@ -171,6 +190,13 @@ class SequencePadGradOp : public framework::OperatorWithKernel {
ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X"));
return framework::OpKernelType(data_type, ctx.device_context());
}
};
} // namespace operators
......
......@@ -32,6 +32,7 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override {
const auto* x = ctx.Input<LoDTensor>("X");
auto* out = ctx.Output<LoDTensor>("Out");
auto* len_t = ctx.Output<LoDTensor>("Length");
out->mutable_data<T>(ctx.GetPlace());
const auto* pad_value = ctx.Input<LoDTensor>("PadValue");
......@@ -41,6 +42,15 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
math::PaddingLoDTensorFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), *x, out, *pad_value,
padded_length, 0, false, math::kBatchLengthWidth);
LoDTensor seq_len;
seq_len.Resize(len_t->dims());
int64_t* len_data = seq_len.mutable_data<int64_t>(platform::CPUPlace());
for (size_t i = 1; i < x->lod()[0].size(); ++i) {
len_data[i - 1] = x->lod()[0][i] - x->lod()[0][i - 1];
}
framework::TensorCopy(seq_len, ctx.GetPlace(),
ctx.template device_context<DeviceContext>(), len_t);
}
};
......
......@@ -2776,7 +2776,8 @@ def sequence_pad(x, pad_value, maxlen=None):
longest original sequence."
Returns:
Variable: The padded sequence batch. All sequences has the same length.
Variable: The padded sequence batch and the original lengths before
padding. All sequences has the same length.
Examples:
.. code-block:: python
......@@ -2792,15 +2793,21 @@ def sequence_pad(x, pad_value, maxlen=None):
helper = LayerHelper('sequence_pad', input=x, **locals())
dtype = helper.input_dtype()
out = helper.create_tmp_variable(dtype)
length = helper.create_tmp_variable(dtype)
pad_value.stop_gradient = True
length.stop_gradient = True
if maxlen is None:
maxlen = -1
helper.append_op(
type='sequence_pad',
inputs={'X': x,
'PadValue': pad_value},
outputs={'Out': out},
outputs={'Out': out,
'Length': length},
attrs={'padded_length': maxlen})
return out
return out, length
def beam_search(pre_ids,
......@@ -6017,7 +6024,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
inputs={'X': [x]},
outputs={'Y': out},
attrs={
'max_len': maxlen if maxlen is not None else -1,
'maxlen': maxlen if maxlen is not None else -1,
'out_dtype': out.dtype
})
return out
......
......@@ -62,7 +62,8 @@ class TestSequencePadOp(OpTest):
start_idx = end_idx
out_data = np.array(padded_sequences)
self.outputs = {'Out': out_data}
length = np.array(self.x_len_lod[0]).reshape((-1, 1))
self.outputs = {'Out': out_data, 'Length': length}
def setUp(self):
self.op_type = 'sequence_pad'
......@@ -129,3 +130,7 @@ class TestSequencePadOp7(TestSequencePadOp):
self.pad_value = [1.0]
self.padded_length = 7
self.dtype = 'float32'
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册