add x86 math:sequence_scale,sequence_padding,sequence2batch,sequence_pooling. test=develop (#1884)

add x86 math:sequence_scale,sequence_padding,sequence2batch,sequence_pooling. test=develop (#1884)

add x86 math:sequence_scale,sequence_padding,sequence2batch,sequence_pooling. test=develop (#1884)
54101ef0 · huzhiqiang · GitHub · 1ee60474 · 54101ef0 · 54101ef0
7 changed file
--- a/lite/x86/math/CMakeLists.txt
+++ b/lite/x86/math/CMakeLists.txt
@@ -37,10 +37,10 @@ math_library(math_function DEPS blas)
 math_library(maxouting)
 math_library(pooling)
 # math_library(selected_rows_functor DEPS selected_rows math_function blas)
-# math_library(sequence2batch)
-# math_library(sequence_padding)
-# math_library(sequence_pooling DEPS math_function jit_kernel_helper)
-# math_library(sequence_scale)
+math_library(sequence2batch)
+math_library(sequence_padding)
+math_library(sequence_pooling DEPS math_function jit_kernel_helper)
+math_library(sequence_scale)
 math_library(softmax DEPS math_function jit_kernel_helper)
 math_library(beam_search DEPS math_function)
 #

--- a/lite/x86/math/sequence2batch.cc
+++ b/lite/x86/math/sequence2batch.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "lite/x86/math/sequence2batch.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+template <typename T>
+class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
+ public:
+  void operator()(const lite::Context<lite::TargetType::kX86>& context,
+                  const lite::Tensor& src,
+                  std::vector<size_t> index_lod,
+                  lite::Tensor* dst,
+                  bool is_src_index) {
+    size_t* index = index_lod.data();
+    auto src_dims = src.dims();
+    auto dst_dims = dst->dims();
+    PADDLE_ENFORCE_EQ(
+        src_dims.size(), 2UL, "The src must be matrix with rank 2.");
+    PADDLE_ENFORCE_EQ(
+        dst_dims.size(), 2UL, "The dst must be matrix with rank 2.");
+    PADDLE_ENFORCE_EQ(
+        src_dims[1], dst_dims[1], "The width of src and dst must be same.");
+    auto height = dst_dims[0];
+    auto width = dst_dims[1];
+    auto* src_data = src.data<T>();
+    auto* dst_data = dst->mutable_data<T>();
+    const int sz = width * sizeof(T);
+    if (is_src_index) {
+      for (int i = 0; i < height; ++i) {
+        memcpy(dst_data + i * width, src_data + index[i] * width, sz);
+      }
+    } else {
+      for (int i = 0; i < height; ++i) {
+        memcpy(dst_data + index[i] * width, src_data + i * width, sz);
+      }
+    }
+  }
+};
+
+template class CopyMatrixRowsFunctor<lite::TargetType::kX86, float>;
+template class CopyMatrixRowsFunctor<lite::TargetType::kX86, double>;
+
+template class LoDTensor2BatchFunctor<lite::TargetType::kX86, float>;
+template class LoDTensor2BatchFunctor<lite::TargetType::kX86, double>;
+template class Batch2LoDTensorFunctor<lite::TargetType::kX86, float>;
+template class Batch2LoDTensorFunctor<lite::TargetType::kX86, double>;
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle
--- a/lite/x86/math/sequence2batch.h
+++ b/lite/x86/math/sequence2batch.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <algorithm>
+#include <vector>
+
+#include "lite/core/context.h"
+#include "lite/core/tensor.h"
+#include "lite/fluid/eigen.h"
+#include "lite/fluid/lod.h"
+#include "lite/utils/paddle_enforce.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+template <typename T,
+          int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenMatrix = lite::fluid::EigenMatrix<T, MajorType, IndexType>;
+
+template <lite::TargetType Target, typename T>
+class CopyMatrixRowsFunctor {
+ public:
+  // If is_src_index is true,
+  // copy the indexed rows of input src to the output dst.
+  // If is_src_index is false,
+  // copy the input src to the indexed rows of output dst.
+  // The indexed rows are based on the input index.
+  void operator()(const lite::Context<Target>& context,
+                  const lite::Tensor& src,
+                  std::vector<size_t> index_lod,
+                  lite::Tensor* dst,
+                  bool is_src_index);
+};
+
+template <lite::TargetType Target, typename T>
+class LoDTensor2BatchFunctor {
+  // Calculate the length of each sequence and
+  // sort sequence index by the length.
+  // example:  sequences = {s0, s1, s2}
+  //           s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
+  //           seq_info[3] = {(4, 5, 1), (0, 4, 0), (9, 3, 2)}
+  //
+  struct SeqInfo {
+    SeqInfo(int start, int length, int seq_idx)
+        : start(start), length(length), seq_idx(seq_idx) {}
+    int start;
+    int length;
+    int seq_idx;
+  };
+
+ public:
+  void operator()(const lite::Context<Target>& context,
+                  const lite::Tensor& lod_tensor,
+                  lite::Tensor* batch,
+                  bool is_cal_batch_lod,
+                  bool is_reverse = false) const {
+    if (!is_cal_batch_lod) {
+      auto lods = batch->lod();
+      PADDLE_ENFORCE_GT(lods.size(),
+                        2UL,
+                        "The LoD of LoDTensor should inlcude at least 2-level "
+                        "sequence information.");
+      PADDLE_ENFORCE_EQ(
+          lods[1].size(),
+          static_cast<size_t>(lod_tensor.dims()[0]),
+          "The LoD information should be consistent with the dims.");
+      CopyMatrixRowsFunctor<Target, T> to_batch;
+      to_batch(context, lod_tensor, lods[1], batch, true);
+      return;
+    }
+
+    auto lods = lod_tensor.lod();
+    PADDLE_ENFORCE_EQ(lods.size(), 1UL, "Only support one level sequence now.");
+
+    const auto& lod = lods[0];
+
+    std::vector<SeqInfo> seq_info;
+    for (size_t seq_id = 0; seq_id < lod.size() - 1; ++seq_id) {
+      int length = lod[seq_id + 1] - lod[seq_id];
+      seq_info.emplace_back(lod[seq_id], length, seq_id);
+    }
+
+    std::sort(seq_info.begin(), seq_info.end(), [](SeqInfo a, SeqInfo b) {
+      return a.length > b.length;
+    });
+
+    // Calculate the start position of each batch.
+    // example:  sequences = {s0, s1, s2}
+    //           s0: 0 0 0 0, s1: 1 1 1 1 1, s2: 2 2 2
+    //           max_seqlen = 5,
+    //           batchIndex = {b0, b1, b2, b3, b4}
+    //           b0: 1 0 2, b1: 1 0 2, b2: 1 0 2, b3: 1 0, b4: 1
+    //           batch_start_positions[6] = {0, 3, 6, 9, 11, 12}
+    //              batch_start_positions[0] = len(b0)
+    //              batch_start_positions[1] = len(b0) + len(b1)
+    //              batch_start_positions[2] = len(b0) + len(b1) + len(b2)
+    //              ...
+    //           seq2batch_idx[12] = {4, 0, 9,
+    //                                5, 1, 10,
+    //                                6, 2, 11,
+    //                                7, 3,
+    //                                8}
+    //           seq_order = {1, 0, 2}, the sort order.
+    //               where 1 is the second sequence,
+    //                     0 is the first sequence,
+    //                     2 is the third sequence.
+    // The max_seqlen represents batch size after rearranging the
+    // input LodTensor. It is also the maximum length of input sequence.
+
+    lite::LoD batch_lods;
+    batch_lods.emplace_back(std::vector<size_t>{0});
+    batch_lods.emplace_back(std::vector<size_t>{0});
+    batch_lods.emplace_back(std::vector<size_t>{0});
+
+    // batch_lods[0] is the start positions for batch LoDTensor
+    int max_seqlen = seq_info[0].length;
+    batch_lods[0].resize(static_cast<size_t>(max_seqlen + 1));
+    // batch_lods[1] is the raw index in the input LoDTensor
+    batch_lods[1].resize(static_cast<size_t>(lod_tensor.dims()[0]));
+    // batch_lods[2] is the sort order for the input LoDTensor.
+    batch_lods[2].resize(seq_info.size());
+
+    size_t* batch_starts = batch_lods[0].data();
+    size_t* seq2batch_idx = batch_lods[1].data();
+    batch_starts[0] = 0;
+    for (int n = 0; n < max_seqlen; n++) {
+      auto batch_id = static_cast<int>(batch_starts[n]);
+      for (size_t i = 0; i < seq_info.size(); ++i) {
+        int seq_len = seq_info[i].length;
+        int start = seq_info[i].start;
+        if (n < seq_len) {
+          seq2batch_idx[batch_id] =
+              is_reverse ? start + seq_len - 1 - n : start + n;
+          batch_id++;
+        } else {
+          break;
+        }
+      }
+      batch_starts[n + 1] = static_cast<size_t>(batch_id);
+    }
+    size_t* seq_order = batch_lods[2].data();
+    for (size_t i = 0; i < seq_info.size(); ++i) {
+      seq_order[i] = seq_info[i].seq_idx;
+    }
+    batch->set_lod(batch_lods);
+
+    CopyMatrixRowsFunctor<Target, T> to_batch;
+    to_batch(context, lod_tensor, batch_lods[1], batch, true);
+  }
+};
+
+template <lite::TargetType Target, typename T>
+class Batch2LoDTensorFunctor {
+ public:
+  void operator()(const lite::Context<Target>& context,
+                  const lite::Tensor& batch,
+                  lite::Tensor* lod_tensor) const {
+    auto in_lod = batch.lod();
+    PADDLE_ENFORCE_GT(in_lod.size(),
+                      2UL,
+                      "The LoD of LoDTensor should inlcude at least 2-level "
+                      "sequence information.");
+    PADDLE_ENFORCE_EQ(
+        in_lod[1].size(),
+        static_cast<size_t>(lod_tensor->dims()[0]),
+        "The LoD information should be consistent with the dims.");
+    CopyMatrixRowsFunctor<Target, T> to_seq;
+    to_seq(context, batch, in_lod[1], lod_tensor, false);
+  }
+};
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle
--- a/lite/x86/math/sequence_padding.cc
+++ b/lite/x86/math/sequence_padding.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "lite/x86/math/sequence_padding.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+template <typename T>
+void CopyValidData(lite::Tensor* dst_tensor,
+                   const lite::Tensor* src_tensor,
+                   const std::vector<size_t>& seq_offsets,
+                   int pad_seq_len,
+                   int step_width,
+                   bool norm_by_len,
+                   CopyType type,
+                   PadLayout layout) {
+  int seq_num = seq_offsets.size() - 1;
+  const T* src_data = src_tensor->data<T>();
+  T* dst_data = dst_tensor->mutable_data<T>();
+
+  int seq_cpy_gap = step_width;
+  int pad_cpy_gap =
+      layout == kBatchLengthWidth ? step_width : seq_num * step_width;
+  for (int seq_idx = 0; seq_idx < seq_num; ++seq_idx) {
+    int valid_seq_len = seq_offsets[seq_idx + 1] - seq_offsets[seq_idx];
+    PADDLE_ENFORCE_GE(
+        pad_seq_len,
+        valid_seq_len,
+        "The padded sequence length can not be less than its original length.");
+    int seq_data_offset = seq_offsets[seq_idx] * step_width;
+    int pad_data_offset = layout == kBatchLengthWidth
+                              ? seq_idx * pad_seq_len * step_width
+                              : seq_idx * step_width;
+    float scale = 1.0f / static_cast<float>(valid_seq_len);
+
+    for (int step_idx = 0; step_idx < valid_seq_len; ++step_idx) {
+      const T* src =
+          src_data + (type == kSeqToPad ? seq_data_offset : pad_data_offset);
+      T* dst =
+          dst_data + (type == kSeqToPad ? pad_data_offset : seq_data_offset);
+      memcpy(dst, src, step_width * sizeof(T));
+      if (norm_by_len) {
+        for (int i = 0; i < step_width; ++i) {
+          *(dst + i) *= scale;
+        }
+      }
+      seq_data_offset += seq_cpy_gap;
+      pad_data_offset += pad_cpy_gap;
+    }
+  }
+}
+
+template <typename T>
+static void fast_mem_init(void* dest,
+                          size_t dest_size,
+                          const T* src,
+                          size_t num_bytes) {
+  if (dest == nullptr || dest_size == 0 || src == nullptr) return;
+
+  memcpy(dest, src, num_bytes);
+
+  dest_size *= num_bytes;
+  while (dest_size > num_bytes) {
+    size_t remaining = dest_size - num_bytes;
+    size_t count = (remaining > num_bytes) ? num_bytes : remaining;
+    memcpy((unsigned char*)dest + num_bytes, dest, count);
+    num_bytes += count;
+  }
+}
+
+template <typename T>
+class PaddingLoDTensorFunctor<lite::TargetType::kX86, T> {
+ public:
+  void operator()(const lite::Context<lite::TargetType::kX86>& context,
+                  const lite::Tensor& seq_tensor,
+                  lite::Tensor* pad_tensor,
+                  const lite::Tensor& pad_value,
+                  int pad_seq_len = -1,
+                  int lod_level = 0,
+                  bool norm_by_times = false,
+                  const PadLayout layout = kBatchLengthWidth) {
+    auto seq_lod = seq_tensor.lod();
+    const auto seq_offsets = lite::fluid::ToAbsOffset(seq_lod)[lod_level];
+    const auto& seq_tensor_dims = seq_tensor.dims();
+    const auto& pad_tensor_dims = pad_tensor->dims();
+    if (pad_seq_len == -1) {
+      pad_seq_len = MaximumSequenceLength(seq_offsets);
+    }
+    int step_width = seq_tensor.numel() / seq_tensor_dims[0];
+
+    CheckDims(seq_tensor_dims,
+              pad_tensor_dims,
+              seq_offsets,
+              pad_seq_len,
+              step_width,
+              layout);
+    PADDLE_ENFORCE(pad_value.numel() == 1 || pad_value.numel() == step_width,
+                   "The numel of 'pad_value' can only be 1 or be equal to the "
+                   "'step_width'.");
+
+    // fill padding value
+    T* pad_data = pad_tensor->mutable_data<T>();
+    const T* pad_value_data = pad_value.data<T>();
+    if (pad_value.numel() == 1) {
+      fast_mem_init<T>(
+          pad_data, pad_tensor->numel(), pad_value_data, sizeof(T));
+    } else {
+      for (int i = 0; i < pad_tensor->numel(); i += step_width) {
+        memcpy(pad_data + i, pad_value_data, step_width * sizeof(T));
+      }
+    }
+
+    CopyValidData<T>(pad_tensor,
+                     &seq_tensor,
+                     seq_offsets,
+                     pad_seq_len,
+                     step_width,
+                     norm_by_times,
+                     kSeqToPad,
+                     layout);
+  }
+};
+
+template <typename T>
+class UnpaddingLoDTensorFunctor<lite::TargetType::kX86, T> {
+ public:
+  void operator()(const lite::Context<lite::TargetType::kX86>& context,
+                  const lite::Tensor& pad_tensor,
+                  lite::Tensor* seq_tensor,
+                  int pad_seq_len = -1,
+                  int lod_level = 0,
+                  bool norm_by_times = false,
+                  const PadLayout layout = kBatchLengthWidth) {
+    auto seq_offsets = lite::fluid::ToAbsOffset(seq_tensor->lod())[lod_level];
+    const auto& seq_tensor_dims = seq_tensor->dims();
+    const auto& pad_tensor_dims = pad_tensor.dims();
+    if (pad_seq_len == -1) {
+      pad_seq_len = MaximumSequenceLength(seq_offsets);
+    }
+    int step_width = seq_tensor->numel() / seq_tensor_dims[0];
+
+    CheckDims(seq_tensor_dims,
+              pad_tensor_dims,
+              seq_offsets,
+              pad_seq_len,
+              step_width,
+              layout);
+
+    CopyValidData<T>(seq_tensor,
+                     &pad_tensor,
+                     seq_offsets,
+                     pad_seq_len,
+                     step_width,
+                     norm_by_times,
+                     kPadToSeq,
+                     layout);
+  }
+};
+
+template class PaddingLoDTensorFunctor<lite::TargetType::kX86, int>;
+template class PaddingLoDTensorFunctor<lite::TargetType::kX86, int64_t>;
+template class PaddingLoDTensorFunctor<lite::TargetType::kX86, float>;
+template class PaddingLoDTensorFunctor<lite::TargetType::kX86, double>;
+
+template class UnpaddingLoDTensorFunctor<lite::TargetType::kX86, int>;
+template class UnpaddingLoDTensorFunctor<lite::TargetType::kX86, int64_t>;
+template class UnpaddingLoDTensorFunctor<lite::TargetType::kX86, float>;
+template class UnpaddingLoDTensorFunctor<lite::TargetType::kX86, double>;
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle
--- a/lite/x86/math/sequence_padding.h
+++ b/lite/x86/math/sequence_padding.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <algorithm>
+#include <vector>
+#include "lite/core/context.h"
+#include "lite/core/tensor.h"
+#include "lite/fluid/lod.h"
+#include "lite/utils/paddle_enforce.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
+
+enum CopyType { kSeqToPad, kPadToSeq };
+
+inline static size_t MaximumSequenceLength(
+    const std::vector<size_t>& seq_offset) {
+  size_t seq_num = seq_offset.size() - 1;
+  size_t max_seq_len = 0;
+  for (size_t i = 0; i < seq_num; ++i) {
+    max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]);
+  }
+  return max_seq_len;
+}
+
+inline static void CheckDims(const lite::DDim& seq_tensor_dims,
+                             const lite::DDim& pad_tensor_dims,
+                             const std::vector<size_t>& seq_offset,
+                             int64_t padded_seq_len,
+                             int64_t step_width,
+                             const PadLayout& layout) {
+  PADDLE_ENFORCE_EQ(static_cast<size_t>(seq_tensor_dims[0]),
+                    seq_offset.back(),
+                    "Value of 1st dimension of the sequence tensor should be "
+                    "equal to sum of lengths of all sequences.");
+
+  PADDLE_ENFORCE(seq_tensor_dims.size() + 1 == pad_tensor_dims.size() ||
+                     seq_tensor_dims.size() == pad_tensor_dims.size(),
+                 "pad_tensor's rank should be 1 greater than seq_tensor's "
+                 "rank, or be equal with it.");
+}
+
+/*
+ * \brief   Padding/Unpadding LoDTensor to/from normal Tensor of the shape
+ *          [max_sequence_length, num_sequences, sequence_width].
+ *
+ *  Padding sequence:
+ *        padding[i] = seq[lod[level][i]]
+ *  Unpadding sequence:
+ *        seq[lod[level][i]] = padding[i]
+ *
+ *  All sequences will be padded to the same length and stored in a transposed
+ * shape.
+ *  Example:
+ *    seq     (s0, s0, s0, s0; s1, s1; s2, s2, s2; s3)
+ *    padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
+ *
+ * \param context       device context of this functor.
+ * \param seq           LoDTensor which is stored in sequence format, the shape
+ *                      is [total_sequence_length, sequence_width] where
+ *                      total_sequence_length is the sum of all sequences'
+ *                      length.
+ * \param padding       Tensor which is padded to the same length, the shape is
+ *                      [max_sequence_length, num_sequences, sequence_width].
+ * \param norm_by_times whether dividing sequence's length.
+ *
+ * \note  transposition is also done in this functor.
+ */
+template <lite::TargetType Target, typename T>
+class PaddingLoDTensorFunctor {
+ public:
+  void operator()(const lite::Context<Target>& context,
+                  const lite::Tensor& seq_tensor,
+                  lite::Tensor* pad_tensor,
+                  const lite::Tensor& pad_value,
+                  int pad_seq_len = -1,
+                  int lod_level = 0,
+                  bool norm_by_times = false,
+                  const PadLayout layout = kBatchLengthWidth);
+};
+
+template <lite::TargetType Target, typename T>
+class UnpaddingLoDTensorFunctor {
+ public:
+  void operator()(const lite::Context<Target>& context,
+                  const lite::Tensor& pad_tensor,
+                  lite::Tensor* seq_tensor,
+                  int pad_seq_len = -1,
+                  int lod_level = 0,
+                  bool norm_by_times = false,
+                  const PadLayout layout = kBatchLengthWidth);
+};
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle
--- a/lite/x86/math/sequence_scale.cc
+++ b/lite/x86/math/sequence_scale.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "lite/x86/math/sequence_scale.h"
+#include "lite/fluid/lod.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+template <typename T>
+class ScaleLoDTensorFunctor<lite::TargetType::kX86, T> {
+ public:
+  void operator()(const lite::Context<lite::TargetType::kX86>& context,
+                  const T* scales,
+                  lite::Tensor* seq) {
+    const size_t level = 0;
+    auto lod = seq->lod();
+    const size_t num_seq = lod[level].size() - 1;
+    size_t seq_width = seq->dims()[1];
+    lite::LoD abs_offset_lod = lite::fluid::ToAbsOffset(lod);
+
+    T* seq_data = seq->mutable_data<T>(lite::TargetType::kX86);
+    for (size_t i = 0; i < num_seq; ++i) {
+      for (size_t j = lod[level][i] * seq_width;
+           j < lod[level][i + 1] * seq_width;
+           ++j) {
+        seq_data[j] *= scales[i];
+      }
+    }
+  }
+};
+
+template class ScaleLoDTensorFunctor<lite::TargetType::kX86, float>;
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle
--- a/lite/x86/math/sequence_scale.h
+++ b/lite/x86/math/sequence_scale.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "lite/core/context.h"
+#include "lite/core/tensor.h"
+
+namespace paddle {
+namespace lite {
+namespace x86 {
+namespace math {
+
+/*
+ * \brief   Scale a sequence.
+ *
+ *  All sequences will be padded to the same length and stored in a transposed
+ * shape.
+ *  Example:
+ *    Given:
+ *      seq = (s0, s0, s0, s0; s1, s1; s2, s2, s2; s3)
+ *      scales = (2, 3, 4, 5)
+ *    then:
+ *      result = (2*s0, 2*s0, 2*s0, 2*s0; 3*s1, 3*s1; 4*s2, 4*s2, 4*s2; 5*s3)
+
+ *
+ * \param context       Device context of this functor.
+ * \param seq           LoDTensor which is stored in sequence format, the shape
+ *                      is [total_sequence_length, sequence_width] where
+ *                      total_sequence_length is the sum of all sequences'
+ *                      length.
+ * \param scales        Array<T>. The i-th sequence will be scaled by scales[i].
+ * \param num_seq       Number of sequence
+ *
+ */
+
+template <lite::TargetType Target, typename T>
+class ScaleLoDTensorFunctor {
+ public:
+  void operator()(const lite::Context<Target>& context,
+                  const T* scales,
+                  lite::Tensor* seq);
+};
+
+}  // namespace math
+}  // namespace x86
+}  // namespace lite
+}  // namespace paddle