sequence_expand_op.h 7.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
W
wanghaoshuang 已提交
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
W
wanghaoshuang 已提交
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
W
wanghaoshuang 已提交
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
W
wanghaoshuang 已提交
14 15

#pragma once
D
dzhwinter 已提交
16
#include <numeric>  // std::iota
W
wanghaoshuang 已提交
17

Y
Yi Wang 已提交
18 19
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
20
#include "paddle/phi/kernels/funcs/math_function.h"
W
wanghaoshuang 已提交
21 22 23 24 25

namespace paddle {
namespace operators {

using LoDTensor = framework::LoDTensor;
26 27
template <typename T,
          int MajorType = Eigen::RowMajor,
D
dzhwinter 已提交
28 29
          typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
W
wanghaoshuang 已提交
30

D
dzhwinter 已提交
31 32
template <typename DeviceContext, typename T>
struct SequenceExpandFunctor {
D
dzhwinter 已提交
33
  void operator()(
34 35
      const DeviceContext& ctx,
      const LoDTensor& x,
D
dzhwinter 已提交
36 37 38
      const framework::Vector<size_t>& x_lod,   /*expand source lod*/
      const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
      LoDTensor* out);
D
dzhwinter 已提交
39 40
};

D
dzhwinter 已提交
41 42
template <typename DeviceContext, typename T>
struct SequenceExpandGradFunctor {
D
dzhwinter 已提交
43
  void operator()(
44 45
      const DeviceContext& ctx,
      const LoDTensor& dout,
D
dzhwinter 已提交
46 47 48
      const framework::Vector<size_t>& x_lod,   /*expand source lod*/
      const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
      LoDTensor* dx);
D
dzhwinter 已提交
49
};
D
dzhwinter 已提交
50 51

template <typename T>
L
Leo Chen 已提交
52
struct SequenceExpandFunctor<phi::CPUContext, T> {
D
dzhwinter 已提交
53
  void operator()(
L
Leo Chen 已提交
54
      const phi::CPUContext& context,
55
      const LoDTensor& x,
D
dzhwinter 已提交
56 57 58
      const framework::Vector<size_t>& x_lod,   /*expand source lod*/
      const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
      LoDTensor* out) {
D
dzhwinter 已提交
59
    int out_offset = 0;
L
luotao1 已提交
60 61 62
    int x_item_length = x.numel() / x.dims()[0];
    auto out_data = out->data<T>();
    auto x_data = x.data<T>();
D
dzhwinter 已提交
63 64
    for (size_t i = 1; i < ref_lod.size(); ++i) {
      int repeat_num = ref_lod[i] - ref_lod[i - 1];
D
dzhwinter 已提交
65 66 67 68 69
      int x_start = x_lod[i - 1];
      int x_end = x_lod[i];
      int x_seq_len = x_end - x_start;
      if (repeat_num > 0) {
        int out_start = out_offset;
D
dzhwinter 已提交
70 71
        if (out->lod().size() == 1) {
          out_start = out->lod()[0][out_offset];
D
dzhwinter 已提交
72
        }
L
luotao1 已提交
73 74 75 76 77 78 79 80
        for (int j = 0; j < repeat_num; j++) {
          for (int k = 0; k < x_seq_len; k++) {
            for (int l = 0; l < x_item_length; l++) {
              out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
                  x_data[(x_start + k) * x_item_length + l];
            }
          }
        }
D
dzhwinter 已提交
81
      }
D
dzhwinter 已提交
82
      out_offset += repeat_num;
D
dzhwinter 已提交
83
    }
D
dzhwinter 已提交
84
  }
D
dzhwinter 已提交
85
};
D
dzhwinter 已提交
86

Q
QI JUN 已提交
87
template <typename DeviceContext, typename T>
W
wanghaoshuang 已提交
88
class SequenceExpandKernel : public framework::OpKernel<T> {
W
wanghaoshuang 已提交
89 90 91
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* x = context.Input<LoDTensor>("X");
W
wanghaoshuang 已提交
92
    auto* y = context.Input<LoDTensor>("Y");
D
dzhwinter 已提交
93 94 95 96 97 98
    auto* out = context.Output<LoDTensor>("Out");

    int ref_level = context.Attr<int>("ref_level");
    auto& x_lod = x->lod();
    auto& y_lod = y->lod();

99
    PADDLE_ENFORCE_EQ(
100 101
        y_lod.empty(),
        false,
102 103 104
        platform::errors::InvalidArgument(
            "Input(Y) Tensor of SequenceExpandOp does not contain "
            "LoD information."));
105

D
dzhwinter 已提交
106 107 108 109 110 111 112 113 114 115
    if (ref_level == -1) ref_level = y_lod.size() - 1;

    out->mutable_data<T>(context.GetPlace());

    if (y_lod[ref_level].size() <= 1) {
      framework::TensorCopy(*x, context.GetPlace(), out);
      return;
    }

    // x lod level is at most 1.
D
dzhwinter 已提交
116 117 118
    framework::Vector<size_t> out_lod;
    if (x_lod.size() == 1) {
      out_lod.push_back(0);
D
dzhwinter 已提交
119 120 121 122 123 124 125
      int out_offset = 0;
      for (size_t i = 1; i < y_lod[ref_level].size(); ++i) {
        int repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1];
        int x_start = x_lod[0][i - 1];
        int x_end = x_lod[0][i];
        int x_seq_len = x_end - x_start;
        for (int j = 0; j < repeat_num; ++j) {
D
dzhwinter 已提交
126
          out_lod.push_back(out_lod.back() + x_seq_len);
D
dzhwinter 已提交
127 128 129
          out_offset++;
        }
      }
D
dzhwinter 已提交
130 131 132 133 134 135 136 137 138 139 140
      // write lod to out if x has lod
      auto& ref_lod = *out->mutable_lod();
      ref_lod[0] = out_lod;
    }
    framework::Vector<size_t> ref_x_lod;
    if (x->lod().size() == 1) {
      ref_x_lod = x->lod()[0];
    } else {
      // x_lod doesn't has lod, use fake x lod, level = 0
      ref_x_lod.resize(x->dims()[0] + 1);
      std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
D
dzhwinter 已提交
141
    }
D
dzhwinter 已提交
142
    SequenceExpandFunctor<DeviceContext, T> functor;
143 144 145 146 147
    functor(context.template device_context<DeviceContext>(),
            *x,
            ref_x_lod,
            y_lod[ref_level],
            out);
W
wanghaoshuang 已提交
148 149 150
  }
};

151 152 153 154 155 156 157 158 159 160 161 162
/*
 *Given Grad(Out)
 *
 *    Grad(Out).lod = [[0,                            2],
 *                     [0,              3,            6]]
 *    Grad(Out).data = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
 * Then
 *    Grad(X).data = [(0.1 + 0.2 + 0.3), (0.4 + 0.5 + 0.6)]
 *                 = [0.6, 1.5]
 *    Grad(X).lod = Input(X).lod
 *
 * */
D
dzhwinter 已提交
163
template <typename T>
L
Leo Chen 已提交
164
struct SequenceExpandGradFunctor<phi::CPUContext, T> {
D
dzhwinter 已提交
165
  void operator()(
L
Leo Chen 已提交
166
      const phi::CPUContext& context,
167
      const LoDTensor& dout,
D
dzhwinter 已提交
168 169 170 171 172 173
      const framework::Vector<size_t>& x_lod,   /*expand source lod*/
      const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
      LoDTensor* dx) {
    int dout_offset = 0;
    for (size_t i = 1; i < ref_lod.size(); ++i) {
      int repeat_num = ref_lod[i] - ref_lod[i - 1];
D
dzhwinter 已提交
174
      if (repeat_num > 0) {
D
dzhwinter 已提交
175 176
        int x_start = x_lod[i - 1];
        int x_end = x_lod[i];
D
dzhwinter 已提交
177
        int x_seq_len = x_end - x_start;
178
        if (x_seq_len == 0) continue;
D
dzhwinter 已提交
179
        auto dx_sub = dx->Slice(x_start, x_end);
180
        dx_sub.Resize(phi::flatten_to_1d(dx_sub.dims()));
D
dzhwinter 已提交
181 182 183
        int dout_end = dout_offset + repeat_num * x_seq_len;
        auto dout_sub = dout.Slice(dout_offset, dout_end);
        dout_sub.Resize({repeat_num, dx_sub.dims()[0]});
L
Leo Chen 已提交
184
        phi::funcs::ColwiseSum<phi::CPUContext, T> col_sum;
D
dzhwinter 已提交
185 186
        col_sum(context, dout_sub, &dx_sub);
        dout_offset += repeat_num * x_seq_len;
D
dzhwinter 已提交
187
      }
W
wanghaoshuang 已提交
188
    }
W
wanghaoshuang 已提交
189 190 191
  }
};

D
dzhwinter 已提交
192 193 194 195
template <typename DeviceContext, typename T>
class SequenceExpandGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
D
dzhwinter 已提交
196
    auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
D
dzhwinter 已提交
197
    auto* x = context.Input<LoDTensor>("X");
D
dzhwinter 已提交
198 199 200 201 202 203 204
    auto* y = context.Input<LoDTensor>("Y");
    auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
    int ref_level = context.Attr<int>("ref_level");

    g_x->mutable_data<T>(context.GetPlace());
    g_x->set_lod(x->lod());

Q
Qingsheng Li 已提交
205
    auto& dev_ctx = context.template device_context<DeviceContext>();
206
    phi::funcs::SetConstant<DeviceContext, T> set_zero;
Q
Qingsheng Li 已提交
207 208
    set_zero(dev_ctx, g_x, static_cast<T>(0));

D
dzhwinter 已提交
209 210 211 212 213 214 215
    auto& y_lod = y->lod();
    if (ref_level == -1) ref_level = y_lod.size() - 1;
    // just copy the gradient
    if (y_lod[ref_level].size() <= 1) {
      framework::TensorCopy(*g_out, context.GetPlace(), g_x);
      return;
    }
D
dzhwinter 已提交
216

D
dzhwinter 已提交
217 218 219 220 221 222 223 224 225
    framework::Vector<size_t> ref_x_lod;
    framework::Vector<size_t> ref_lod = y_lod[ref_level];
    if (x->lod().size() == 1) {
      ref_x_lod = x->lod()[0];
    } else {
      // x_lod doesn't has lod, use fake x lod, level = 0
      ref_x_lod.resize(x->dims()[0] + 1);
      std::iota(ref_x_lod.begin(), ref_x_lod.end(), 0);
    }
D
dzhwinter 已提交
226
    SequenceExpandGradFunctor<DeviceContext, T> functor;
227 228 229 230 231
    functor(context.template device_context<DeviceContext>(),
            *g_out,
            ref_x_lod,
            ref_lod,
            g_x);
D
dzhwinter 已提交
232 233 234
  }
};

W
wanghaoshuang 已提交
235 236
}  // namespace operators
}  // namespace paddle