index_select_op.h 9.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
18
#include "paddle/pten/kernels/funcs/blas/blas.h"
19
#include "paddle/pten/kernels/funcs/math_function.h"
20 21 22 23 24 25 26 27

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;

28
template <typename DeviceContext, typename T, typename IndexT = int>
29
void IndexSelectInner(const framework::ExecutionContext& context,
30
                      LoDTensor* input, const LoDTensor& index,
31
                      LoDTensor* output, int dim) {
32
  auto input_dim = input->dims();
33 34
  auto input_dim_size = input_dim.size();
  auto output_dim = output->dims();
35 36 37 38 39 40 41 42 43 44
  auto index_size = index.dims()[0];

  LoDTensor index_cpu_copy;
  if (!platform::is_cpu_place(index.place())) {
    framework::TensorCopySync(index, platform::CPUPlace(), &index_cpu_copy);
  }
  const IndexT* index_data = platform::is_cpu_place(index.place())
                                 ? index.data<IndexT>()
                                 : index_cpu_copy.data<IndexT>();
  output->mutable_data<T>(context.GetPlace());
45 46 47 48 49 50 51 52 53 54 55

  auto slice_size = 1;
  for (auto i = dim + 1; i < input_dim_size; i++) {
    slice_size *= input_dim[i];
  }

  auto outer_nums = 1;
  for (auto i = 0; i < dim; i++) {
    outer_nums *= input_dim[i];
  }

56 57
  for (int i = 0; i < index_size; i++) {
    PADDLE_ENFORCE_GE(
58
        index_data[i], 0,
59 60 61 62
        platform::errors::InvalidArgument(
            "Variable value (index) of OP(index_select) "
            "expected >= 0 and < %ld, but got %ld. Please check input "
            "value.",
63
            input_dim[dim], index_data[i]));
64
    PADDLE_ENFORCE_LT(
65
        index_data[i], input_dim[dim],
66 67 68 69
        platform::errors::InvalidArgument(
            "Variable value (index) of OP(index_select) "
            "expected >= 0 and < %ld, but got %ld. Please check input "
            "value.",
70
            input_dim[dim], index_data[i]));
71 72
  }

73
  VLOG(3) << "Index_Select_Debug; outer_nums: " << outer_nums
74
          << "; slice_size: " << slice_size << "; index_size: " << index_size;
75

76 77
  input->Resize(pten::make_ddim({outer_nums, input_dim[dim], slice_size}));
  output->Resize(pten::make_ddim({outer_nums, index_size, slice_size}));
78 79 80 81 82 83 84 85 86 87 88

  auto input_tensor = framework::EigenTensor<T, 3>::From(*input);
  auto output_tensor = framework::EigenTensor<T, 3>::From(*output);

  auto& place =
      *context.template device_context<DeviceContext>().eigen_device();

  for (auto j = 0; j < index_size; j++) {
    IndexT index_value = index_data[j];
    auto output_t = output_tensor.chip(j, 1);
    output_t.device(place) = input_tensor.chip(index_value, 1);
89
  }
90
  input->Resize(input_dim);
91 92 93 94 95 96 97
  output->Resize(output_dim);
}

template <typename DeviceContext, typename T>
class IndexSelectKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
98 99 100
    auto inputs = *context.Input<framework::LoDTensor>("X");
    auto* index = context.Input<framework::LoDTensor>("Index");
    auto* output = context.Output<framework::LoDTensor>("Out");
101 102 103 104 105

    int dim = context.Attr<int>("dim");
    if (dim < 0) {
      dim += inputs.dims().size();
    }
106
    const auto& index_type = framework::TransToProtoVarType(index->dtype());
107 108 109 110 111 112 113 114 115 116 117 118 119
    bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                            index_type == framework::proto::VarType::INT64;
    PADDLE_ENFORCE_EQ(index_type_match, true,
                      platform::errors::InvalidArgument(
                          "Input(Index) holds the wrong type, it holds %s, but "
                          "desires to be %s or %s",
                          paddle::framework::DataTypeToString(index_type),
                          paddle::framework::DataTypeToString(
                              framework::proto::VarType::INT32),
                          paddle::framework::DataTypeToString(
                              framework::proto::VarType::INT64)));

    if (index_type == framework::proto::VarType::INT32) {
120 121
      IndexSelectInner<DeviceContext, T, int>(context, &inputs, *index, output,
                                              dim);
122
    } else if (index_type == framework::proto::VarType::INT64) {
123 124
      IndexSelectInner<DeviceContext, T, int64_t>(context, &inputs, *index,
                                                  output, dim);
125 126 127 128
    }
  }
};

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
template <typename DeviceContext, typename T, class Enable = void>
struct IndexSelectAdd {
  void operator()(const framework::ExecutionContext& ctx, int slice_size,
                  const T* src_pointer, const T* p_pointer, T* dist_pointer) {
    for (int i = 0; i < slice_size; i++) {
      dist_pointer[i] = src_pointer[i] + p_pointer[i];
    }
  }
};
template <typename DeviceContext, typename T>
struct IndexSelectAdd<
    DeviceContext, T,
    typename std::enable_if<std::is_floating_point<T>::value>::type> {
  void operator()(const framework::ExecutionContext& ctx, int slice_size,
                  const T* src_pointer, const T* p_pointer, T* dist_pointer) {
144
    auto blas = pten::funcs::GetBlas<DeviceContext, T>(ctx);
145 146 147 148 149
    blas.VADD(slice_size, src_pointer, p_pointer, dist_pointer);
  }
};

template <typename DeviceContext, typename T, typename IndexT = int>
150
void IndexSelectGradInner(const framework::ExecutionContext& context,
151
                          const LoDTensor& out_grad, const LoDTensor& index,
152
                          LoDTensor* x_grad, int dim) {
153 154
  const T* input_data = out_grad.data<T>();
  const IndexT* index_data = index.data<IndexT>();
155 156
  const T* p_output = x_grad->mutable_data<T>(context.GetPlace());
  T* out_data = x_grad->mutable_data<T>(context.GetPlace());
157
  auto input_dim = out_grad.dims();
158 159
  auto input_dim_size = input_dim.size();
  auto output_dim = x_grad->dims();
160 161

  auto& dev_ctx = context.template device_context<DeviceContext>();
162
  pten::funcs::SetConstant<DeviceContext, T> set_constant;
163
  set_constant(dev_ctx, x_grad, static_cast<T>(0.0));
164 165 166 167 168 169 170 171 172 173 174 175 176 177

  auto slice_size = 1;
  for (auto i = dim + 1; i < input_dim_size; i++) {
    slice_size *= input_dim[i];
  }

  auto input_width = slice_size * input_dim[dim];
  auto output_width = slice_size * output_dim[dim];

  auto outer_nums = 1;
  for (auto i = 0; i < dim; i++) {
    outer_nums *= input_dim[i];
  }

178
  auto index_size = index.dims()[0];
179 180 181 182 183 184 185 186 187 188
  VLOG(3) << "Index_Select_Grad_Debug; outer_nums: " << outer_nums
          << "; slice_size: " << slice_size << "; input_width: " << input_width
          << "; output_width: " << output_width
          << "; index_size: " << index_size;

  for (auto i = 0; i < outer_nums; i++) {
    auto input_start_offset = i * input_width;
    auto output_start_offset = i * output_width;

    for (auto j = 0; j < index_size; j++) {
189 190 191 192 193 194
      IndexT index_value = index_data[j];
      auto src = input_data + input_start_offset + j * slice_size;
      auto p_out = p_output + output_start_offset + index_value * slice_size;
      auto dst = out_data + output_start_offset + index_value * slice_size;
      IndexSelectAdd<DeviceContext, T> index_select_add;
      index_select_add(context, slice_size, src, p_out, dst);
195 196 197 198 199 200 201 202 203
    }
  }
  x_grad->Resize(output_dim);
}

template <typename DeviceContext, typename T>
class IndexSelectGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
204 205 206 207 208
    auto* x_grad =
        context.Output<framework::LoDTensor>(framework::GradVarName("X"));
    auto* index = context.Input<framework::LoDTensor>("Index");
    auto* out_grad =
        context.Input<framework::LoDTensor>(framework::GradVarName("Out"));
209 210 211

    int dim = context.Attr<int>("dim");
    if (dim < 0) {
212
      dim += out_grad->dims().size();
213
    }
214
    const auto& index_type = framework::TransToProtoVarType(index->dtype());
215 216 217 218 219 220 221 222 223 224 225 226 227 228

    bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                            index_type == framework::proto::VarType::INT64;
    PADDLE_ENFORCE_EQ(index_type_match, true,
                      platform::errors::InvalidArgument(
                          "Input(Index) holds the wrong type, it holds %s, but "
                          "desires to be %s or %s",
                          paddle::framework::DataTypeToString(index_type),
                          paddle::framework::DataTypeToString(
                              framework::proto::VarType::INT32),
                          paddle::framework::DataTypeToString(
                              framework::proto::VarType::INT64)));

    if (index_type == framework::proto::VarType::INT32) {
229
      IndexSelectGradInner<DeviceContext, T, int>(context, *out_grad, *index,
230
                                                  x_grad, dim);
231
    } else if (index_type == framework::proto::VarType::INT64) {
232 233
      IndexSelectGradInner<DeviceContext, T, int64_t>(context, *out_grad,
                                                      *index, x_grad, dim);
234 235 236 237 238 239
    }
  }
};

}  // namespace operators
}  // namespace paddle