sequence_slice_op.h 8.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
Y
Yi Wang 已提交
16 17
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
18
#include "paddle/phi/kernels/funcs/math_function.h"
19 20 21 22

namespace paddle {
namespace operators {

23
using Tensor = phi::DenseTensor;
24 25 26 27
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;

template <typename T>
28 29
inline LoD SequenceSliceLoD(const T& in,
                            const int64_t* offset_data,
D
dzhwinter 已提交
30
                            const int64_t* length_data) {
31
  auto out_lod = in.lod();
32 33
  size_t lod_offset = 0;

34
  auto n = in.lod()[0].size() - 1;
35 36
  out_lod[0][0] = 0;
  for (size_t i = 0; i < n; ++i) {
37
    lod_offset += length_data[i];
D
dzhwinter 已提交
38
    out_lod[0][i + 1] = lod_offset;
39 40 41 42
  }
  return out_lod;
}

Q
QI JUN 已提交
43
template <typename DeviceContext, typename T>
44
class SequenceSliceOpKernel : public framework::OpKernel<T> {
45 46 47
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in = ctx.Input<LoDTensor>("X");
48 49
    auto* offset = ctx.Input<phi::DenseTensor>("Offset");
    auto* length = ctx.Input<phi::DenseTensor>("Length");
50 51
    auto* out = ctx.Output<LoDTensor>("Out");

52
    auto lod = in->lod();
53 54
    PADDLE_ENFORCE_EQ(lod.empty(),
                      false,
55 56 57
                      platform::errors::InvalidArgument(
                          "Input(X) Tensor of SequenceSlice operator does not "
                          "contain LoD information."));
58

59
    PADDLE_ENFORCE_EQ(
60 61
        lod.size(),
        1UL,
62 63 64 65
        platform::errors::InvalidArgument(
            "LoD information error. SequenceSlice operator only support one "
            "level sequence now, but received LoD level is %d.",
            lod.size()));
66
    auto n = lod[0].size() - 1;
67
    PADDLE_ENFORCE_EQ(
68 69
        n,
        static_cast<size_t>(length->dims()[0]),
70 71 72 73
        platform::errors::InvalidArgument(
            "Input length shape error. The length of input LoD sequence and "
            "input length-array‘s first dimension should be equal, but the LoD "
            "sequence length is %d, the length-array‘s first dimension is %d.",
74 75
            n,
            static_cast<size_t>(length->dims()[0])));
76
    PADDLE_ENFORCE_EQ(
77 78
        n,
        static_cast<size_t>(offset->dims()[0]),
79 80 81 82
        platform::errors::InvalidArgument(
            "Input offset shape error. The length of input LoD sequence and "
            "input offset-array‘s first dimension should be equal, but the LoD "
            "sequence length is %d, the offset-array‘s first dimension is %d.",
83 84
            n,
            static_cast<size_t>(offset->dims()[0])));
85

86 87
    const int64_t* offset_data = offset->data<int64_t>();
    const int64_t* length_data = length->data<int64_t>();
88 89
    phi::DenseTensor offset_cpu;
    phi::DenseTensor length_cpu;
90 91 92

    if (platform::is_gpu_place(ctx.GetPlace())) {
      offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
F
fengjiayi 已提交
93
      framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
94 95 96
      offset_data = offset_cpu.data<int64_t>();

      length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
F
fengjiayi 已提交
97
      framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
98 99
      length_data = length_cpu.data<int64_t>();
    }
100 101

    for (size_t i = 0; i < n; ++i) {
102 103
      PADDLE_ENFORCE_LE(0,
                        offset_data[i],
104 105 106
                        platform::errors::InvalidArgument(
                            "The input offset[%d]'s value is negative, its "
                            "value is %d, expect it to be non-negative.",
107 108 109 110
                            i,
                            offset_data[i]));
      PADDLE_ENFORCE_LE(0,
                        length_data[i],
111 112 113
                        platform::errors::InvalidArgument(
                            "The input length[%d]'s value is negative, its "
                            "value is %d, expect it to be non-negative.",
114 115
                            i,
                            offset_data[i]));
116
      PADDLE_ENFORCE_LE(
117 118
          lod[0][i] + offset_data[i] + length_data[i],
          lod[0][i + 1],
119 120 121
          platform::errors::OutOfRange(
              "The slice end index of target tensor is out of range. expect it "
              "less than or equal to %d, but the actual slice end index is %d.",
122 123
              lod[0][i + 1],
              lod[0][i] + offset_data[i] + length_data[i]));
W
wanghaox 已提交
124
    }
125 126

    out->mutable_data<T>(ctx.GetPlace());
127
    auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
128 129 130
    auto out_dims = in->dims();
    out_dims[0] = out_lod[0][out_lod[0].size() - 1];
    out->Resize(out_dims);
131 132
    out->set_lod(out_lod);

133 134
    auto in_stride = phi::stride(in->dims());
    auto out_stride = phi::stride(out->dims());
135 136 137

    size_t out_offset = 0;
    for (size_t i = 0; i < n; ++i) {
138
      if (length_data[i] == 0) continue;
D
dzhwinter 已提交
139 140 141 142
      Tensor in_t = in->Slice(
          static_cast<int>(lod[0][i] + offset_data[i]),
          static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));

143 144 145 146 147 148
      StridedMemcpy<T>(ctx.device_context(),
                       in_t.data<T>(),
                       in_stride,
                       in_t.dims(),
                       out_stride,
                       out->data<T>() + out_offset);
149
      out_offset += length_data[i] * in_stride[0];
150 151 152 153
    }
  }
};

Q
QI JUN 已提交
154
template <typename DeviceContext, typename T>
155
class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
156 157 158
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in = ctx.Input<LoDTensor>("X");
159 160
    auto* offset = ctx.Input<phi::DenseTensor>("Offset");
    auto* length = ctx.Input<phi::DenseTensor>("Length");
161 162 163 164 165
    auto* out_grad =
        ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
    auto* x_grad =
        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));

166 167
    const int64_t* offset_data = offset->data<int64_t>();
    const int64_t* length_data = length->data<int64_t>();
168 169
    phi::DenseTensor offset_cpu;
    phi::DenseTensor length_cpu;
170

171 172
    if (platform::is_gpu_place(ctx.GetPlace())) {
      offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
F
fengjiayi 已提交
173
      framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
174
      offset_data = offset_cpu.data<int64_t>();
175

176
      length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
F
fengjiayi 已提交
177
      framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
178
      length_data = length_cpu.data<int64_t>();
179 180
    }

181
    auto lod = in->lod();
182 183
    // to avoid out_grad missing lod, compute lod again
    auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
184

W
wanghaox 已提交
185 186
    if (x_grad) {
      x_grad->mutable_data<T>(ctx.GetPlace());
W
wanghaox 已提交
187
      x_grad->set_lod(in->lod());
188
      phi::funcs::SetConstant<DeviceContext, T> set_zero;
189 190
      set_zero(ctx.template device_context<DeviceContext>(),
               x_grad,
Q
QI JUN 已提交
191
               static_cast<T>(0));
192

W
wanghaox 已提交
193
      for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
194
        if (length_data[i] == 0) continue;
W
wanghaox 已提交
195 196 197
        Tensor out_grad_t =
            out_grad->Slice(static_cast<int>(out_lod[0][i]),
                            static_cast<int>(out_lod[0][i + 1]));
198
        auto out_grad_stride = phi::stride(out_grad_t.dims());
199

200
        auto x_grad_stride = phi::stride(x_grad->dims());
201

W
wanghaox 已提交
202 203 204
        Tensor x_grad_t = x_grad->Slice(
            static_cast<int>(lod[0][i] + offset_data[i]),
            static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
205

206 207 208 209 210
        StridedMemcpy<T>(ctx.device_context(),
                         out_grad_t.data<T>(),
                         out_grad_stride,
                         out_grad_t.dims(),
                         x_grad_stride,
D
dzhwinter 已提交
211
                         x_grad_t.data<T>());
W
wanghaox 已提交
212
      }
213 214 215 216 217 218
    }
  }
};

}  // namespace operators
}  // namespace paddle