diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index 70f26055b7cc0516b051fab2c8094752d5a9f9f1..60ba4797db1e2af267a37715c715fb7107ac8500 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -34,28 +34,26 @@ class SequenceUnpadOpKernel : public framework::OpKernel { auto* len_t = ctx.Input("Length"); auto* out_t = ctx.Output("Out"); - const int64_t* seq_len_ptr = nullptr; + auto& dev_ctx = ctx.template device_context(); + framework::Tensor seq_len_cpu = + ctx.AllocateTmpTensor(len_t->dims(), dev_ctx); if (platform::is_gpu_place(ctx.GetPlace())) { - LoDTensor seq_len_cpu; - seq_len_cpu.Resize(len_t->dims()); - seq_len_ptr = seq_len_cpu.mutable_data(platform::CPUPlace()); - framework::TensorCopy(*len_t, platform::CPUPlace(), - ctx.template device_context(), - &seq_len_cpu); + seq_len_cpu.mutable_data(platform::CPUPlace()); + framework::TensorCopySync(*len_t, platform::CPUPlace(), &seq_len_cpu); } else { - seq_len_ptr = len_t->data(); + seq_len_cpu = *len_t; } - size_t batch_size = x_t->dims()[0]; + const int64_t* seq_len_ptr = seq_len_cpu.data(); + int64_t batch_size = len_t->dims()[0]; std::vector out_lod0(batch_size + 1, 0); - for (size_t i = 0; i < batch_size; ++i) { - out_lod0[i + 1] = out_lod0[i] + seq_len_ptr[i]; + for (int64_t i = 0; i < batch_size; ++i) { + out_lod0[i + 1] = out_lod0[i] + static_cast(seq_len_ptr[i]); } framework::LoD out_lod; out_lod.push_back(out_lod0); out_t->set_lod(out_lod); - std::vector out_dims_vec{static_cast(out_lod0.back())}; if (x_t->dims().size() == 2) { out_dims_vec.push_back(1); @@ -71,8 +69,7 @@ class SequenceUnpadOpKernel : public framework::OpKernel { int64_t padded_length = x_t->dims()[1]; math::UnpaddingLoDTensorFunctor()( - ctx.template device_context(), *x_t, out_t, - padded_length, 0, false, math::kBatchLengthWidth); + dev_ctx, *x_t, out_t, padded_length, 0, false, math::kBatchLengthWidth); } };