tensor_py.h 8.5 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14 15

#pragma once
L
Luo Tao 已提交
16
#include <Python.h>
Q
qijun 已提交
17
#include <string>
C
chengduoZH 已提交
18 19
#include <tuple>
#include <vector>
Y
Yi Wang 已提交
20 21 22
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
23
#include "paddle/fluid/platform/float16.h"
Q
qijun 已提交
24 25
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
26 27

namespace paddle {
28
namespace pybind {
29 30 31 32 33 34 35
namespace details {

template <bool less, size_t I, typename... ARGS>
struct CastToPyBufferImpl;

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<false, I, ARGS...> {
36
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
37
    PADDLE_THROW("This type of tensor cannot be expose to Python");
38
    return pybind11::buffer_info();
39 40 41 42 43 44
  }
};

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> {
  using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
45
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
Y
Yu Yang 已提交
46
    if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) {
47 48 49 50 51 52 53 54 55 56 57 58
      auto dim_vec = framework::vectorize(tensor.dims());
      std::vector<size_t> dims_outside;
      std::vector<size_t> strides;
      dims_outside.resize(dim_vec.size());
      strides.resize(dim_vec.size());

      size_t prod = 1;
      for (size_t i = dim_vec.size(); i != 0; --i) {
        dims_outside[i - 1] = (size_t)dim_vec[i - 1];
        strides[i - 1] = sizeof(CUR_TYPE) * prod;
        prod *= dims_outside[i - 1];
      }
Q
qijun 已提交
59
      framework::Tensor dst_tensor;
Y
Yu Yang 已提交
60
      if (paddle::platform::is_gpu_place(tensor.place())) {
61 62 63 64
#ifdef PADDLE_WITH_CUDA
        auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
        auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
            tensor.dims(), platform::CPUPlace()));
D
dzhwinter 已提交
65

Y
Yu Yang 已提交
66 67 68
        paddle::platform::GpuMemcpySync(dst_ptr, src_ptr,
                                        sizeof(CUR_TYPE) * tensor.numel(),
                                        cudaMemcpyDeviceToHost);
69
#else
D
dzhwinter 已提交
70
        PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
71
#endif
Y
Yu Yang 已提交
72
      } else if (paddle::platform::is_cpu_place(tensor.place())) {
Q
qijun 已提交
73 74
        dst_tensor = tensor;
      }
75 76 77

      if (std::type_index(typeid(CUR_TYPE)) ==
          std::type_index(typeid(platform::float16))) {
78 79 80 81
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            "e", /* np.dtype('e') == np.float16 */
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
82
      } else {
83 84 85 86
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            pybind11::format_descriptor<CUR_TYPE>::format(),
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
87
      }
88 89 90 91 92 93
    } else {
      constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
      return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
    }
  }
};
94

95
}  // namespace details
96

97
inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) {
98
  auto buffer_info =
99
      details::CastToPyBufferImpl<true, 0, float, int, double, int64_t, bool,
Q
qingqing01 已提交
100
                                  uint8_t, int8_t, platform::float16>()(tensor);
101 102 103
  return buffer_info;
}

104
template <typename T>
105
T TensorGetElement(const framework::Tensor &self, size_t offset) {
106 107 108 109
  if (platform::is_cpu_place(self.place())) {
    return self.data<T>()[offset];
  } else {
    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
F
fix  
fengjiayi 已提交
110
    framework::TensorCopySync(self, platform::CPUPlace(), dst.get());
111 112
    return dst->data<T>()[offset];
  }
113 114
}

Y
Yu Yang 已提交
115
// TODO(dzhwinter) : fix the redundant Tensor allocate and free
116
template <typename T>
117 118
void TensorSetElement(framework::Tensor *self, size_t offset, T elem) {
  if (platform::is_gpu_place(self->place())) {
Y
Yu Yang 已提交
119 120 121 122
    framework::Tensor dst;
    framework::TensorCopySync(*self, platform::CPUPlace(), &dst);
    dst.mutable_data<T>(platform::CPUPlace())[offset] = elem;
    framework::TensorCopySync(dst, self->place(), self);
123
  } else if (platform::is_cpu_place(self->place())) {
Y
Yu Yang 已提交
124
    self->mutable_data<T>(self->place())[offset] = elem;
125
  }
126 127
}

128
template <typename T>
Q
qijun 已提交
129
void PyCPUTensorSetFromArray(
130 131 132 133
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
Q
qijun 已提交
134
  std::vector<int64_t> dims;
135 136
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
137
    dims.push_back(static_cast<int>(array.shape()[i]));
138 139
  }

140 141
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
142 143 144
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

145
template <>
C
chengduoZH 已提交
146 147
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
148
inline void PyCPUTensorSetFromArray(
149 150 151 152 153
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
154 155 156
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
157
    dims.push_back(static_cast<int>(array.shape()[i]));
158 159
  }

160 161
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
162 163 164
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}

165
#ifdef PADDLE_WITH_CUDA
Q
qijun 已提交
166 167
template <typename T>
void PyCUDATensorSetFromArray(
168 169 170 171
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
Q
qijun 已提交
172
  std::vector<int64_t> dims;
Q
qijun 已提交
173 174
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
175
    dims.push_back(static_cast<int>(array.shape()[i]));
Q
qijun 已提交
176
  }
Q
qijun 已提交
177

178 179
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
Y
Yu Yang 已提交
180 181
  paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
                                  cudaMemcpyHostToDevice);
182
}
183 184

template <>
C
chengduoZH 已提交
185 186
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
187
inline void PyCUDATensorSetFromArray(
188 189 190 191 192
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
193 194 195
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
196
    dims.push_back(static_cast<int>(array.shape()[i]));
197 198
  }

199 200
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
Y
Yu Yang 已提交
201 202 203
  paddle::platform::GpuMemcpySync(dst, array.data(),
                                  sizeof(uint16_t) * array.size(),
                                  cudaMemcpyHostToDevice);
204
}
C
chengduoZH 已提交
205 206 207

template <typename T>
void PyCUDAPinnedTensorSetFromArray(
208 209 210
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
211 212 213 214 215 216 217
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

218 219
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
C
chengduoZH 已提交
220 221 222 223
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

template <>
C
chengduoZH 已提交
224 225
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
226
inline void PyCUDAPinnedTensorSetFromArray(
227 228 229 230
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
231 232 233 234 235 236 237
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

238 239
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
C
chengduoZH 已提交
240 241
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}
Q
qijun 已提交
242
#endif
243 244 245

}  // namespace pybind
}  // namespace paddle