From 3c9a1a1d66a88a6f539e41ada3d6271fda83506f Mon Sep 17 00:00:00 2001
From: hong19860320 <9973393+hong19860320@users.noreply.github.com>
Date: Fri, 18 Sep 2020 09:54:34 +0800
Subject: [PATCH] [cherry-pick] Fix build error caused by flatbuffer if the
 target is tiny_publish, and fix the missing of the attr name of the output
 scale (#4356)

* [LITE][XPU] 1. Add sequence_unpad kernel for XPU; 2. Bugfix in sequence_unpad kernel for x86, as InferShapeImpl() is now empty in lite/operators/sequence_unpad_op.cc; 3. Refine TargetWrapperXPU; (#4237)

* [Core] Fix the missing of the attr name of the output scale (#4334)

* [NPU] Fix build error caused by flatbuffer if the target is tiny_publish (#4340)
test=develop

Co-authored-by: Cwndmiao <miaotianxiang@baidu.com>
---
 lite/backends/xpu/target_wrapper.cc           | 23 ++++-
 lite/backends/xpu/target_wrapper.h            | 14 +--
 lite/core/op_lite.cc                          |  1 +
 lite/kernels/apu/bridges/conv_op.cc           |  4 +-
 lite/kernels/apu/bridges/pool_op.cc           |  2 +-
 lite/kernels/npu/bridges/conv_op.cc           |  4 +-
 lite/kernels/npu/bridges/conv_transpose_op.cc |  4 +-
 lite/kernels/npu/bridges/pad2d_op.cc          |  2 +-
 lite/kernels/npu/bridges/pool_op.cc           |  2 +-
 lite/kernels/npu/bridges/reduce_mean_op.cc    |  2 +-
 lite/kernels/rknpu/bridges/conv_op.cc         |  4 +-
 lite/kernels/rknpu/bridges/pool_op.cc         |  2 +-
 lite/kernels/x86/sequence_unpad_compute.h     | 25 +++++
 lite/kernels/xpu/CMakeLists.txt               |  1 +
 lite/kernels/xpu/bridges/conv_op.cc           |  4 +-
 lite/kernels/xpu/bridges/pool_op.cc           |  2 +-
 lite/kernels/xpu/sequence_pool_compute.cc     |  2 +
 lite/kernels/xpu/sequence_unpad_compute.cc    | 98 +++++++++++++++++++
 lite/kernels/xpu/sequence_unpad_compute.h     | 44 +++++++++
 19 files changed, 216 insertions(+), 24 deletions(-)
 create mode 100644 lite/kernels/xpu/sequence_unpad_compute.cc
 create mode 100644 lite/kernels/xpu/sequence_unpad_compute.h

diff --git a/lite/backends/xpu/target_wrapper.cc b/lite/backends/xpu/target_wrapper.cc
index a3d8729410..5f5eae4703 100644
--- a/lite/backends/xpu/target_wrapper.cc
+++ b/lite/backends/xpu/target_wrapper.cc
@@ -18,6 +18,27 @@
 namespace paddle {
 namespace lite {
 
+void XPUScratchPad::Reserve(size_t new_size) {
+  if (new_size <= size_) {
+    return;
+  }
+
+  if (!is_l3_) {
+    TargetWrapperXPU::Free(addr_);
+    addr_ = TargetWrapperXPU::Malloc(new_size);
+    size_ = new_size;
+  } else {
+    CHECK(false) << "Not supported if is_l3_ == true";
+  }
+}
+
+void XPUScratchPadDeleter::operator()(XPUScratchPad* sp) const {
+  if (!sp->is_l3_) {
+    TargetWrapperXPU::Free(sp->addr_);
+  }
+  delete sp;
+}
+
 void* TargetWrapperXPU::Malloc(size_t size) {
   void* ptr{nullptr};
   XPU_CALL(xpu_malloc(&ptr, size));
@@ -51,7 +72,7 @@ XPUScratchPadGuard TargetWrapperXPU::MallocScratchPad(size_t size,
     ptr = TargetWrapperXPU::Malloc(size);
   }
   CHECK(ptr != nullptr) << "size = " << size << ", use_l3 = " << use_l3;
-  return XPUScratchPadGuard(new XPUScratchPad(ptr, use_l3));
+  return XPUScratchPadGuard(new XPUScratchPad(ptr, size, use_l3));
 }
 
 std::string TargetWrapperXPU::multi_encoder_precision;  // NOLINT
diff --git a/lite/backends/xpu/target_wrapper.h b/lite/backends/xpu/target_wrapper.h
index 1a888b126a..8151d733ba 100644
--- a/lite/backends/xpu/target_wrapper.h
+++ b/lite/backends/xpu/target_wrapper.h
@@ -37,19 +37,19 @@ const int XPU_MAX_LOD_SEQ_LEN = 512;
 using TargetWrapperXPU = TargetWrapper<TARGET(kXPU)>;
 
 struct XPUScratchPad {
-  XPUScratchPad(void* addr, bool is_l3) : addr_(addr), is_l3_(is_l3) {}
+  XPUScratchPad(void* addr, size_t size, bool is_l3)
+      : addr_(addr), size_(size), is_l3_(is_l3) {}
+
+  // XXX(miaotianxiang): |size_| increases monotonically
+  void Reserve(size_t new_size);
 
   void* addr_{nullptr};
+  size_t size_{0};
   bool is_l3_{false};
 };
 
 struct XPUScratchPadDeleter {
-  void operator()(XPUScratchPad* sp) const {
-    if (!sp->is_l3_) {
-      XPU_CALL(xpu_free(sp->addr_));
-    }
-    delete sp;
-  }
+  void operator()(XPUScratchPad* sp) const;
 };
 
 using XPUScratchPadGuard = std::unique_ptr<XPUScratchPad, XPUScratchPadDeleter>;
diff --git a/lite/core/op_lite.cc b/lite/core/op_lite.cc
index c3c00d0fa0..dcab292be8 100644
--- a/lite/core/op_lite.cc
+++ b/lite/core/op_lite.cc
@@ -322,6 +322,7 @@ std::vector<float> OpInfo::GetOutputScale(const std::string &name,
     int index;
     CHECK(GetOutputArgname(name, &argname));
     CHECK(GetOutputIndex(name, &index));
+    scale_name = argname + to_string(index) + "_scale";
   }
   return GetAttr<std::vector<float>>(scale_name);
 }
diff --git a/lite/kernels/apu/bridges/conv_op.cc b/lite/kernels/apu/bridges/conv_op.cc
index bdac473b1b..1c3020065e 100644
--- a/lite/kernels/apu/bridges/conv_op.cc
+++ b/lite/kernels/apu/bridges/conv_op.cc
@@ -60,9 +60,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   CHECK_EQ(output_dims[0], bs);
   CHECK_EQ(output_dims[1], oc);
   auto strides = op_info->GetAttr<std::vector<int>>("strides");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
   auto groups = op_info->GetAttr<int>("groups");
-  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
+  std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
   bool with_act =
       op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
   std::string act_type =
diff --git a/lite/kernels/apu/bridges/pool_op.cc b/lite/kernels/apu/bridges/pool_op.cc
index 594c7fabda..e255518044 100644
--- a/lite/kernels/apu/bridges/pool_op.cc
+++ b/lite/kernels/apu/bridges/pool_op.cc
@@ -45,7 +45,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
   auto global_pooling = op_info->GetAttr<bool>("global_pooling");
   auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
 
   // pool mode
   if ((pooling_type == "max") || (pooling_type == "avg")) {
diff --git a/lite/kernels/npu/bridges/conv_op.cc b/lite/kernels/npu/bridges/conv_op.cc
index 5cc79137b9..95632c7a05 100644
--- a/lite/kernels/npu/bridges/conv_op.cc
+++ b/lite/kernels/npu/bridges/conv_op.cc
@@ -53,9 +53,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   CHECK_EQ(output_dims[0], bs);
   CHECK_EQ(output_dims[1], oc);
   auto strides = op_info->GetAttr<std::vector<int>>("strides");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
   auto groups = op_info->GetAttr<int>("groups");
-  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
+  std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
   bool with_act =
       op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
   std::string act_type =
diff --git a/lite/kernels/npu/bridges/conv_transpose_op.cc b/lite/kernels/npu/bridges/conv_transpose_op.cc
index 7e149ed243..52ae137d52 100644
--- a/lite/kernels/npu/bridges/conv_transpose_op.cc
+++ b/lite/kernels/npu/bridges/conv_transpose_op.cc
@@ -59,8 +59,8 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
     output_size = op_info->GetAttr<std::vector<int>>("output_size");
   }
 
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
-  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
   CHECK_EQ(dilations.size(), 2L);
   std::string padding_algorithm =
       op_info->HasAttr("padding_algorithm")
diff --git a/lite/kernels/npu/bridges/pad2d_op.cc b/lite/kernels/npu/bridges/pad2d_op.cc
index 70fa87e778..cb35b24752 100644
--- a/lite/kernels/npu/bridges/pad2d_op.cc
+++ b/lite/kernels/npu/bridges/pad2d_op.cc
@@ -35,7 +35,7 @@ int Pad2dConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto x = scope->FindMutableTensor(x_name);
   auto x_dims = x->dims();
   auto out_name = op_info->Output("Out").front();
-  auto padding = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> padding = op_info->GetAttr<std::vector<int>>("paddings");
   CHECK_EQ(padding.size(), 4);
 
   // X node
diff --git a/lite/kernels/npu/bridges/pool_op.cc b/lite/kernels/npu/bridges/pool_op.cc
index fc2647f67e..921e1a2571 100644
--- a/lite/kernels/npu/bridges/pool_op.cc
+++ b/lite/kernels/npu/bridges/pool_op.cc
@@ -39,7 +39,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
   auto global_pooling = op_info->GetAttr<bool>("global_pooling");
   auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
 
   // X node
   std::shared_ptr<Node> x_node = nullptr;
diff --git a/lite/kernels/npu/bridges/reduce_mean_op.cc b/lite/kernels/npu/bridges/reduce_mean_op.cc
index 5987342672..a608082be0 100644
--- a/lite/kernels/npu/bridges/reduce_mean_op.cc
+++ b/lite/kernels/npu/bridges/reduce_mean_op.cc
@@ -36,7 +36,7 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto x_dims = x->dims();
   auto out_name = op_info->Input("Out").front();
   auto keep_dim = op_info->GetAttr<bool>("keep_dim");
-  auto dim = op_info->GetAttr<std::vector<int>>("dim");
+  std::vector<int> dim = op_info->GetAttr<std::vector<int>>("dim");
   CHECK(!dim.empty()) << "[NPU] \"dim\" of reduce_mean should not be empty.";
   for (size_t i = 0; i < dim.size(); i++) {
     if (dim[i] < 0) {
diff --git a/lite/kernels/rknpu/bridges/conv_op.cc b/lite/kernels/rknpu/bridges/conv_op.cc
index 134d9e0cde..a789f0bacc 100644
--- a/lite/kernels/rknpu/bridges/conv_op.cc
+++ b/lite/kernels/rknpu/bridges/conv_op.cc
@@ -51,9 +51,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   CHECK_EQ(output_dims[0], bs);
   CHECK_EQ(output_dims[1], oc);
   auto strides = op_info->GetAttr<std::vector<int>>("strides");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
   auto groups = op_info->GetAttr<int>("groups");
-  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
+  std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
   auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
   CHECK_EQ(strides.size(), 2L);
   CHECK_EQ(dilations.size(), 2L);
diff --git a/lite/kernels/rknpu/bridges/pool_op.cc b/lite/kernels/rknpu/bridges/pool_op.cc
index 36832fc578..1a5a69b134 100644
--- a/lite/kernels/rknpu/bridges/pool_op.cc
+++ b/lite/kernels/rknpu/bridges/pool_op.cc
@@ -42,7 +42,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
   auto global_pooling = op_info->GetAttr<bool>("global_pooling");
   auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
 
   // for quantization
   bool enable_int8 = false;
diff --git a/lite/kernels/x86/sequence_unpad_compute.h b/lite/kernels/x86/sequence_unpad_compute.h
index 5b4e3f6c16..b8bdfe08e8 100644
--- a/lite/kernels/x86/sequence_unpad_compute.h
+++ b/lite/kernels/x86/sequence_unpad_compute.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 #pragma once
 
+#include <vector>
 #include "lite/backends/x86/math/sequence_padding.h"
 #include "lite/core/kernel.h"
 #include "lite/core/op_registry.h"
@@ -34,6 +35,30 @@ class SequenceUnpadCompute
     auto& param = this->template Param<param_t>();
     auto& ctx = this->ctx_->template As<X86Context>();
 
+    auto x_dims = param.X->dims();
+    auto len_dims = param.Length->dims();
+
+    auto* seq_len_ptr = param.Length->template data<int64_t>();
+    int64_t batch_size = len_dims[0];
+    std::vector<uint64_t> out_lod0(batch_size + 1, 0);
+    for (int64_t i = 0; i < batch_size; ++i) {
+      out_lod0[i + 1] = out_lod0[i] + seq_len_ptr[i];
+    }
+    paddle::lite::LoD out_lod;
+    out_lod.push_back(out_lod0);
+
+    int64_t out_dim0 = out_lod0.back();
+    std::vector<int64_t> out_dims{out_dim0};
+    if (x_dims.size() == 2) {
+      out_dims.push_back(1);
+    } else {
+      for (size_t i = 2; i < x_dims.size(); ++i) {
+        out_dims.push_back(x_dims[i]);
+      }
+    }
+    param.Out->Resize(out_dims);
+    param.Out->set_lod(out_lod);
+
     param.Out->template mutable_data<T>();
     int64_t padded_length = param.X->dims()[1];
     math::UnpaddingLoDTensorFunctor<lite::TargetType::kX86, T>()(
diff --git a/lite/kernels/xpu/CMakeLists.txt b/lite/kernels/xpu/CMakeLists.txt
index 798d707dd7..cc69120557 100644
--- a/lite/kernels/xpu/CMakeLists.txt
+++ b/lite/kernels/xpu/CMakeLists.txt
@@ -38,6 +38,7 @@ else()
   add_kernel(match_matrix_tensor_compute_xpu XPU extra SRCS match_matrix_tensor_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(var_conv_2d_compute_xpu XPU extra SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(search_grnn_compute_xpu XPU extra SRCS search_grnn_compute.cc DEPS ${lite_kernel_deps})
+  add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
 
   # extra(fused kernel)
   add_kernel(__xpu__resnet50_compute_xpu XPU extra SRCS __xpu__resnet50_compute.cc DEPS ${lite_kernel_deps})
diff --git a/lite/kernels/xpu/bridges/conv_op.cc b/lite/kernels/xpu/bridges/conv_op.cc
index a4c0bc05cb..590d830ce4 100644
--- a/lite/kernels/xpu/bridges/conv_op.cc
+++ b/lite/kernels/xpu/bridges/conv_op.cc
@@ -44,9 +44,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   CHECK_EQ(input_dims.size(), 4);
   CHECK_EQ(filter_dims.size(), 4);
   auto strides = op_info->GetAttr<std::vector<int>>("strides");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
   auto groups = op_info->GetAttr<int>("groups");
-  auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
+  std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
   auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
   CHECK_EQ(strides.size(), 2L);
   CHECK_EQ(dilations.size(), 2L);
diff --git a/lite/kernels/xpu/bridges/pool_op.cc b/lite/kernels/xpu/bridges/pool_op.cc
index 862e1841e8..5c38cacddd 100644
--- a/lite/kernels/xpu/bridges/pool_op.cc
+++ b/lite/kernels/xpu/bridges/pool_op.cc
@@ -37,7 +37,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   auto out_name = op_info->Output("Out").front();
   auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
   auto ceil_mode = op_info->GetAttr<bool>("ceil_mode");
-  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
   auto global_pooling = op_info->GetAttr<bool>("global_pooling");
   auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
   auto strides = op_info->GetAttr<std::vector<int>>("strides");
diff --git a/lite/kernels/xpu/sequence_pool_compute.cc b/lite/kernels/xpu/sequence_pool_compute.cc
index f8e71639b7..35412cf49c 100644
--- a/lite/kernels/xpu/sequence_pool_compute.cc
+++ b/lite/kernels/xpu/sequence_pool_compute.cc
@@ -42,6 +42,8 @@ void XPUSequencePoolCompute::Run() {
 
   xdnn::Pooling_t pool_type = xdnn::Pooling_t::MAX_WITHOUT_INDEX;
   if (pool_type_str == "MAX") {
+  } else if (pool_type_str == "SUM") {
+    pool_type = xdnn::Pooling_t::SUM;
   } else if (pool_type_str == "LAST") {
     pool_type = xdnn::Pooling_t::LAST;
   } else {
diff --git a/lite/kernels/xpu/sequence_unpad_compute.cc b/lite/kernels/xpu/sequence_unpad_compute.cc
new file mode 100644
index 0000000000..2ce296ca21
--- /dev/null
+++ b/lite/kernels/xpu/sequence_unpad_compute.cc
@@ -0,0 +1,98 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/xpu/sequence_unpad_compute.h"
+#include "lite/backends/xpu/xpu_header_sitter.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+void SequenceUnpadCompute::PrepareForRun() {
+  lod_xpu_guard_ = TargetWrapperXPU::MallocScratchPad(
+      XPU_MAX_LOD_SIZE * sizeof(int), false /* use_l3 */);
+  lod_cpu_.reserve(XPU_MAX_LOD_SIZE);
+}
+
+void SequenceUnpadCompute::Run() {
+  auto& param = this->template Param<param_t>();
+  auto& ctx = this->ctx_->template As<XPUContext>();
+
+  auto x_dims = param.X->dims();
+  auto len_dims = param.Length->dims();
+
+  // XXX(miaotianxiang): Target of tensor |Length| is |kHost|.
+  auto* seq_len_ptr = param.Length->template data<int64_t>();
+  int64_t batch_size = len_dims[0];
+  std::vector<uint64_t> out_lod0(batch_size + 1, 0);
+  for (int64_t i = 0; i < batch_size; ++i) {
+    out_lod0[i + 1] = out_lod0[i] + seq_len_ptr[i];
+  }
+  paddle::lite::LoD out_lod;
+  out_lod.push_back(out_lod0);
+
+  int64_t out_dim0 = out_lod0.back();
+  std::vector<int64_t> out_dims{out_dim0};
+  if (x_dims.size() == 2) {
+    out_dims.push_back(1);
+  } else {
+    for (size_t i = 2; i < x_dims.size(); ++i) {
+      out_dims.push_back(x_dims[i]);
+    }
+  }
+  param.Out->Resize(out_dims);
+  param.Out->set_lod(out_lod);
+
+  lod_cpu_ = {0};
+  for (int64_t i = 0; i < batch_size; ++i) {
+    int offset =
+        lod_cpu_.back() + static_cast<int>(param.Length->data<int64_t>()[i]);
+    lod_cpu_.push_back(offset);
+  }
+  lod_xpu_guard_->Reserve((batch_size + 1) * sizeof(int));
+  TargetWrapperXPU::MemcpySync(lod_xpu_guard_->addr_,
+                               lod_cpu_.data(),
+                               (batch_size + 1) * sizeof(int),
+                               IoDirection::HtoD);
+
+  int dim = param.Out->numel() / out_dim0;
+  int r = xdnn::sequence_unpad(
+      ctx.GetRawContext(),                           /* ctx */
+      param.X->data<float>(),                        /* pad_data */
+      param.Out->mutable_data<float>(TARGET(kXPU)),  /* seq_data */
+      reinterpret_cast<int*>(lod_xpu_guard_->addr_), /* sequence */
+      param.X->dims()[1],                            /* pad_seq_len */
+      batch_size,                                    /* batch_size */
+      dim /* dim */);
+  CHECK_EQ(r, 0);
+}
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(sequence_unpad,
+                     kXPU,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::xpu::SequenceUnpadCompute,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindInput("Length",
+               {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
diff --git a/lite/kernels/xpu/sequence_unpad_compute.h b/lite/kernels/xpu/sequence_unpad_compute.h
new file mode 100644
index 0000000000..8e038383e6
--- /dev/null
+++ b/lite/kernels/xpu/sequence_unpad_compute.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include "lite/backends/xpu/target_wrapper.h"  // XPUScratchPadGuard
+#include "lite/core/kernel.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+class SequenceUnpadCompute
+    : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::SequenceUnpadParam;
+
+  void PrepareForRun() override;
+
+  void Run() override;
+
+ private:
+  XPUScratchPadGuard lod_xpu_guard_;
+  std::vector<int> lod_cpu_;
+};
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
-- 
GitLab