sequence_slice_op.h 8.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
Y
Yi Wang 已提交
16
#include "paddle/fluid/framework/op_registry.h"
17
#include "paddle/phi/kernels/funcs/math_function.h"
18
#include "paddle/phi/kernels/funcs/strided_memcpy.h"
19 20 21 22

namespace paddle {
namespace operators {

23
using Tensor = phi::DenseTensor;
24
using LoDTensor = phi::DenseTensor;
25 26 27
using LoD = framework::LoD;

template <typename T>
28 29
inline LoD SequenceSliceLoD(const T& in,
                            const int64_t* offset_data,
D
dzhwinter 已提交
30
                            const int64_t* length_data) {
31
  auto out_lod = in.lod();
32 33
  size_t lod_offset = 0;

34
  auto n = in.lod()[0].size() - 1;
35 36
  out_lod[0][0] = 0;
  for (size_t i = 0; i < n; ++i) {
37
    lod_offset += length_data[i];
D
dzhwinter 已提交
38
    out_lod[0][i + 1] = lod_offset;
39 40 41 42
  }
  return out_lod;
}

Q
QI JUN 已提交
43
template <typename DeviceContext, typename T>
44
class SequenceSliceOpKernel : public framework::OpKernel<T> {
45 46 47
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in = ctx.Input<LoDTensor>("X");
48 49
    auto* offset = ctx.Input<phi::DenseTensor>("Offset");
    auto* length = ctx.Input<phi::DenseTensor>("Length");
50 51
    auto* out = ctx.Output<LoDTensor>("Out");

52
    auto lod = in->lod();
53 54
    PADDLE_ENFORCE_EQ(lod.empty(),
                      false,
55 56 57
                      platform::errors::InvalidArgument(
                          "Input(X) Tensor of SequenceSlice operator does not "
                          "contain LoD information."));
58

59
    PADDLE_ENFORCE_EQ(
60 61
        lod.size(),
        1UL,
62 63 64 65
        platform::errors::InvalidArgument(
            "LoD information error. SequenceSlice operator only support one "
            "level sequence now, but received LoD level is %d.",
            lod.size()));
66
    auto n = lod[0].size() - 1;
67
    PADDLE_ENFORCE_EQ(
68 69
        n,
        static_cast<size_t>(length->dims()[0]),
70 71 72 73
        platform::errors::InvalidArgument(
            "Input length shape error. The length of input LoD sequence and "
            "input length-array‘s first dimension should be equal, but the LoD "
            "sequence length is %d, the length-array‘s first dimension is %d.",
74 75
            n,
            static_cast<size_t>(length->dims()[0])));
76
    PADDLE_ENFORCE_EQ(
77 78
        n,
        static_cast<size_t>(offset->dims()[0]),
79 80 81 82
        platform::errors::InvalidArgument(
            "Input offset shape error. The length of input LoD sequence and "
            "input offset-array‘s first dimension should be equal, but the LoD "
            "sequence length is %d, the offset-array‘s first dimension is %d.",
83 84
            n,
            static_cast<size_t>(offset->dims()[0])));
85

86 87
    const int64_t* offset_data = offset->data<int64_t>();
    const int64_t* length_data = length->data<int64_t>();
88 89
    phi::DenseTensor offset_cpu;
    phi::DenseTensor length_cpu;
90 91 92

    if (platform::is_gpu_place(ctx.GetPlace())) {
      offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
F
fengjiayi 已提交
93
      framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
94 95 96
      offset_data = offset_cpu.data<int64_t>();

      length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
F
fengjiayi 已提交
97
      framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
98 99
      length_data = length_cpu.data<int64_t>();
    }
100 101

    for (size_t i = 0; i < n; ++i) {
102 103
      PADDLE_ENFORCE_LE(0,
                        offset_data[i],
104 105 106
                        platform::errors::InvalidArgument(
                            "The input offset[%d]'s value is negative, its "
                            "value is %d, expect it to be non-negative.",
107 108 109 110
                            i,
                            offset_data[i]));
      PADDLE_ENFORCE_LE(0,
                        length_data[i],
111 112 113
                        platform::errors::InvalidArgument(
                            "The input length[%d]'s value is negative, its "
                            "value is %d, expect it to be non-negative.",
114 115
                            i,
                            offset_data[i]));
116
      PADDLE_ENFORCE_LE(
117 118
          lod[0][i] + offset_data[i] + length_data[i],
          lod[0][i + 1],
119 120 121
          platform::errors::OutOfRange(
              "The slice end index of target tensor is out of range. expect it "
              "less than or equal to %d, but the actual slice end index is %d.",
122 123
              lod[0][i + 1],
              lod[0][i] + offset_data[i] + length_data[i]));
W
wanghaox 已提交
124
    }
125 126

    out->mutable_data<T>(ctx.GetPlace());
127
    auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
128 129 130
    auto out_dims = in->dims();
    out_dims[0] = out_lod[0][out_lod[0].size() - 1];
    out->Resize(out_dims);
131 132
    out->set_lod(out_lod);

133 134
    auto in_stride = phi::stride(in->dims());
    auto out_stride = phi::stride(out->dims());
135 136 137

    size_t out_offset = 0;
    for (size_t i = 0; i < n; ++i) {
138
      if (length_data[i] == 0) continue;
D
dzhwinter 已提交
139 140 141 142
      Tensor in_t = in->Slice(
          static_cast<int>(lod[0][i] + offset_data[i]),
          static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));

143 144 145 146 147 148
      phi::funcs::StridedMemcpy<T>(ctx.device_context(),
                                   in_t.data<T>(),
                                   in_stride,
                                   in_t.dims(),
                                   out_stride,
                                   out->data<T>() + out_offset);
149
      out_offset += length_data[i] * in_stride[0];
150 151 152 153
    }
  }
};

Q
QI JUN 已提交
154
template <typename DeviceContext, typename T>
155
class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
156 157 158
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in = ctx.Input<LoDTensor>("X");
159 160
    auto* offset = ctx.Input<phi::DenseTensor>("Offset");
    auto* length = ctx.Input<phi::DenseTensor>("Length");
161 162
    auto* out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
    auto* x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
163

164 165
    const int64_t* offset_data = offset->data<int64_t>();
    const int64_t* length_data = length->data<int64_t>();
166 167
    phi::DenseTensor offset_cpu;
    phi::DenseTensor length_cpu;
168

169 170
    if (platform::is_gpu_place(ctx.GetPlace())) {
      offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
F
fengjiayi 已提交
171
      framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
172
      offset_data = offset_cpu.data<int64_t>();
173

174
      length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
F
fengjiayi 已提交
175
      framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
176
      length_data = length_cpu.data<int64_t>();
177 178
    }

179
    auto lod = in->lod();
180 181
    // to avoid out_grad missing lod, compute lod again
    auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
182

W
wanghaox 已提交
183 184
    if (x_grad) {
      x_grad->mutable_data<T>(ctx.GetPlace());
W
wanghaox 已提交
185
      x_grad->set_lod(in->lod());
186
      phi::funcs::SetConstant<DeviceContext, T> set_zero;
187 188
      set_zero(ctx.template device_context<DeviceContext>(),
               x_grad,
Q
QI JUN 已提交
189
               static_cast<T>(0));
190

W
wanghaox 已提交
191
      for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
192
        if (length_data[i] == 0) continue;
W
wanghaox 已提交
193 194 195
        Tensor out_grad_t =
            out_grad->Slice(static_cast<int>(out_lod[0][i]),
                            static_cast<int>(out_lod[0][i + 1]));
196
        auto out_grad_stride = phi::stride(out_grad_t.dims());
197

198
        auto x_grad_stride = phi::stride(x_grad->dims());
199

W
wanghaox 已提交
200 201 202
        Tensor x_grad_t = x_grad->Slice(
            static_cast<int>(lod[0][i] + offset_data[i]),
            static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
203

204 205 206 207 208 209
        phi::funcs::StridedMemcpy<T>(ctx.device_context(),
                                     out_grad_t.data<T>(),
                                     out_grad_stride,
                                     out_grad_t.dims(),
                                     x_grad_stride,
                                     x_grad_t.data<T>());
W
wanghaox 已提交
210
      }
211 212 213 214 215 216
    }
  }
};

}  // namespace operators
}  // namespace paddle