gather.h 2.3 KB
Newer Older
Z
Zhuoyuan 已提交
1 2 3 4 5 6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

Z
zchen0211 已提交
7
   http://www.apache.org/licenses/LICENSE-2.0
Z
Zhuoyuan 已提交
8 9 10 11 12 13 14 15

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
Z
zchen0211 已提交
16
#include <memory.h>
Z
Zhuoyuan 已提交
17
#include <cstring>
Z
zchen0211 已提交
18

Z
Zhuoyuan 已提交
19
#include "paddle/framework/ddim.h"
Z
zchen0211 已提交
20 21
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
Z
Zhuoyuan 已提交
22 23 24
#include "paddle/framework/tensor.h"
#include "paddle/platform/place.h"

Z
zchen0211 已提交
25 26
namespace paddle {
namespace operators {
Z
Zhuoyuan 已提交
27

Z
zchen0211 已提交
28
// Implementation of CPU copy
Z
Zhuoyuan 已提交
29
template <typename T>
Z
zchen0211 已提交
30
void CPUGather(const T* src, const int* indices, const int slice_size,
Z
zchen0211 已提交
31
               const int index_size, T* output) {
Z
Zhuoyuan 已提交
32 33
  const size_t slice_bytes = slice_size * sizeof(T);

Z
zchen0211 已提交
34
  for (int i = 0; i < index_size; ++i) {
Z
Zhuoyuan 已提交
35
    int index_ = indices[i];
Z
zchen0211 已提交
36
    memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes);
Z
Zhuoyuan 已提交
37
  }
Z
Zhuoyuan 已提交
38 39
}

Z
zchen0211 已提交
40
// Implementation of GPU copy:
Z
Zhuoyuan 已提交
41
template <typename T>
Z
zchen0211 已提交
42 43
void GPUGather(const T* src, const int* index, const int slice_size,
               const int index_size, T* output);
Z
Zhuoyuan 已提交
44

Z
zchen0211 已提交
45 46 47 48 49 50
/**
 * Return a new tensor from source tensor, gathered according to index
 * input[src]: type-T source Tensor
 * input[index]: type-int index Tensor (1-D)
 * return: output tensor
 */
Z
Zhuoyuan 已提交
51
template <typename T>
Z
zchen0211 已提交
52
void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
Z
zchen0211 已提交
53 54 55 56 57 58 59
            const paddle::framework::Tensor* index,
            paddle::framework::Tensor* output) {
  // check index of shape 1-D
  PADDLE_ENFORCE(index->dims().size() == 1);
  int index_size = index->dims()[0];

  auto src_dims = src->dims();
Z
zchen0211 已提交
60
  framework::DDim output_dims(src_dims);
Z
zchen0211 已提交
61 62 63 64
  output_dims[0] = index_size;

  // slice size
  int slice_size = 1;
Z
zchen0211 已提交
65
  for (int i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i];
Z
zchen0211 已提交
66 67 68

  // Gathering
  if (platform::is_cpu_place(place)) {
Z
zchen0211 已提交
69
    CPUGather<T>(src->data<T>(), index->data<int>(), slice_size, index_size,
Z
zchen0211 已提交
70
                 output->data<T>());
Z
Zhuoyuan 已提交
71
  }
Z
Zhuoyuan 已提交
72
}
Z
zchen0211 已提交
73 74 75

}  // namespace operators
}  // namespace paddle