tensor_py.h 8.4 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14 15

#pragma once
L
Luo Tao 已提交
16
#include <Python.h>
Q
qijun 已提交
17
#include <string>
C
chengduoZH 已提交
18 19
#include <tuple>
#include <vector>
Y
Yi Wang 已提交
20 21 22
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
23
#include "paddle/fluid/platform/float16.h"
Q
qijun 已提交
24 25
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
26 27

namespace paddle {
28
namespace pybind {
29 30 31 32 33 34 35
namespace details {

template <bool less, size_t I, typename... ARGS>
struct CastToPyBufferImpl;

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<false, I, ARGS...> {
36
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
37
    PADDLE_THROW("This type of tensor cannot be expose to Python");
38
    return pybind11::buffer_info();
39 40 41 42 43 44
  }
};

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> {
  using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
45
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
Y
Yu Yang 已提交
46
    if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) {
47 48 49 50 51 52 53 54 55 56 57 58
      auto dim_vec = framework::vectorize(tensor.dims());
      std::vector<size_t> dims_outside;
      std::vector<size_t> strides;
      dims_outside.resize(dim_vec.size());
      strides.resize(dim_vec.size());

      size_t prod = 1;
      for (size_t i = dim_vec.size(); i != 0; --i) {
        dims_outside[i - 1] = (size_t)dim_vec[i - 1];
        strides[i - 1] = sizeof(CUR_TYPE) * prod;
        prod *= dims_outside[i - 1];
      }
Q
qijun 已提交
59
      framework::Tensor dst_tensor;
Y
Yu Yang 已提交
60
      if (paddle::platform::is_gpu_place(tensor.place())) {
61 62 63 64
#ifdef PADDLE_WITH_CUDA
        auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
        auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
            tensor.dims(), platform::CPUPlace()));
D
dzhwinter 已提交
65

Y
Yu Yang 已提交
66 67 68
        paddle::platform::GpuMemcpySync(dst_ptr, src_ptr,
                                        sizeof(CUR_TYPE) * tensor.numel(),
                                        cudaMemcpyDeviceToHost);
69
#else
D
dzhwinter 已提交
70
        PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
71
#endif
Y
Yu Yang 已提交
72
      } else if (paddle::platform::is_cpu_place(tensor.place())) {
Q
qijun 已提交
73 74
        dst_tensor = tensor;
      }
75 76 77

      if (std::type_index(typeid(CUR_TYPE)) ==
          std::type_index(typeid(platform::float16))) {
78 79 80 81
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            "e", /* np.dtype('e') == np.float16 */
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
82
      } else {
83 84 85 86
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            pybind11::format_descriptor<CUR_TYPE>::format(),
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
87
      }
88 89 90 91 92 93
    } else {
      constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
      return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
    }
  }
};
94

95
}  // namespace details
96

97
inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) {
98
  auto buffer_info =
99 100
      details::CastToPyBufferImpl<true, 0, float, int, double, int64_t, bool,
                                  platform::float16>()(tensor);
101 102 103
  return buffer_info;
}

104
template <typename T>
105
T TensorGetElement(const framework::Tensor &self, size_t offset) {
106 107 108 109
  if (platform::is_cpu_place(self.place())) {
    return self.data<T>()[offset];
  } else {
    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
Y
Yi Wang 已提交
110
    framework::TensorCopy(self, platform::CPUPlace(), dst.get());
111 112
    return dst->data<T>()[offset];
  }
113 114
}

115
// TODO(dzhwinter) : fix the redundent Tensor allocate and free
116
template <typename T>
117 118
void TensorSetElement(framework::Tensor *self, size_t offset, T elem) {
  if (platform::is_gpu_place(self->place())) {
119
    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
120
    framework::TensorCopy(*self, platform::CPUPlace(), dst.get());
121
    dst->data<T>()[offset] = elem;
122
    framework::TensorCopy(*dst.get(), self->place(), self);
123

124 125
  } else if (platform::is_cpu_place(self->place())) {
    self->data<T>()[offset] = elem;
126
  }
127 128
}

129
template <typename T>
Q
qijun 已提交
130
void PyCPUTensorSetFromArray(
131 132 133 134
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
Q
qijun 已提交
135
  std::vector<int64_t> dims;
136 137
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
138
    dims.push_back(static_cast<int>(array.shape()[i]));
139 140
  }

141 142
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
143 144 145
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

146
template <>
C
chengduoZH 已提交
147 148
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
149
void PyCPUTensorSetFromArray(
150 151 152 153 154
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
155 156 157
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
158
    dims.push_back(static_cast<int>(array.shape()[i]));
159 160
  }

161 162
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
163 164 165
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}

166
#ifdef PADDLE_WITH_CUDA
Q
qijun 已提交
167 168
template <typename T>
void PyCUDATensorSetFromArray(
169 170 171 172
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
Q
qijun 已提交
173
  std::vector<int64_t> dims;
Q
qijun 已提交
174 175
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
176
    dims.push_back(static_cast<int>(array.shape()[i]));
Q
qijun 已提交
177
  }
Q
qijun 已提交
178

179 180
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
Y
Yu Yang 已提交
181 182
  paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
                                  cudaMemcpyHostToDevice);
183
}
184 185

template <>
C
chengduoZH 已提交
186 187
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
188
void PyCUDATensorSetFromArray(
189 190 191 192 193
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
194 195 196
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
197
    dims.push_back(static_cast<int>(array.shape()[i]));
198 199
  }

200 201
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
Y
Yu Yang 已提交
202 203 204
  paddle::platform::GpuMemcpySync(dst, array.data(),
                                  sizeof(uint16_t) * array.size(),
                                  cudaMemcpyHostToDevice);
205
}
C
chengduoZH 已提交
206 207 208

template <typename T>
void PyCUDAPinnedTensorSetFromArray(
209 210 211
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
212 213 214 215 216 217 218
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

219 220
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
C
chengduoZH 已提交
221 222 223 224
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

template <>
C
chengduoZH 已提交
225 226
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
C
chengduoZH 已提交
227
void PyCUDAPinnedTensorSetFromArray(
228 229 230 231
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
232 233 234 235 236 237 238
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

239 240
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
C
chengduoZH 已提交
241 242
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}
Q
qijun 已提交
243
#endif
244 245 246

}  // namespace pybind
}  // namespace paddle