tensor_py.h 8.5 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

L
Luo Tao 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

L
Luo Tao 已提交
7
    http://www.apache.org/licenses/LICENSE-2.0
8

L
Luo Tao 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
14 15

#pragma once
L
Luo Tao 已提交
16
#include <Python.h>
Q
qijun 已提交
17
#include <string>
C
chengduoZH 已提交
18 19
#include <tuple>
#include <vector>
Y
Yi Wang 已提交
20 21 22
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
23
#include "paddle/fluid/platform/float16.h"
Q
qijun 已提交
24 25
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
26 27

namespace paddle {
28
namespace pybind {
29 30 31 32 33 34 35
namespace details {

template <bool less, size_t I, typename... ARGS>
struct CastToPyBufferImpl;

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<false, I, ARGS...> {
36
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
37
    PADDLE_THROW("This type of tensor cannot be expose to Python");
38
    return pybind11::buffer_info();
39 40 41 42 43 44
  }
};

template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> {
  using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
45
  pybind11::buffer_info operator()(const framework::Tensor &tensor) {
Y
Yu Yang 已提交
46
    if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) {
47 48 49 50 51 52 53 54 55 56 57 58
      auto dim_vec = framework::vectorize(tensor.dims());
      std::vector<size_t> dims_outside;
      std::vector<size_t> strides;
      dims_outside.resize(dim_vec.size());
      strides.resize(dim_vec.size());

      size_t prod = 1;
      for (size_t i = dim_vec.size(); i != 0; --i) {
        dims_outside[i - 1] = (size_t)dim_vec[i - 1];
        strides[i - 1] = sizeof(CUR_TYPE) * prod;
        prod *= dims_outside[i - 1];
      }
Q
qijun 已提交
59
      framework::Tensor dst_tensor;
Y
Yu Yang 已提交
60
      if (paddle::platform::is_gpu_place(tensor.place())) {
61 62 63
#ifdef PADDLE_WITH_CUDA
        auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
        auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
Y
Refine  
Yu Yang 已提交
64 65
            tensor.dims(), platform::CPUPlace(),
            memory::Allocator::kCommunication));
D
dzhwinter 已提交
66

Y
Yu Yang 已提交
67 68 69
        paddle::platform::GpuMemcpySync(dst_ptr, src_ptr,
                                        sizeof(CUR_TYPE) * tensor.numel(),
                                        cudaMemcpyDeviceToHost);
70
#else
D
dzhwinter 已提交
71
        PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
72
#endif
Y
Yu Yang 已提交
73
      } else if (paddle::platform::is_cpu_place(tensor.place())) {
Q
qijun 已提交
74 75
        dst_tensor = tensor;
      }
76 77 78

      if (std::type_index(typeid(CUR_TYPE)) ==
          std::type_index(typeid(platform::float16))) {
79 80 81 82
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            "e", /* np.dtype('e') == np.float16 */
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
83
      } else {
84 85 86 87
        return pybind11::buffer_info(
            dst_tensor.data<CUR_TYPE>(), sizeof(CUR_TYPE),
            pybind11::format_descriptor<CUR_TYPE>::format(),
            (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
88
      }
89 90 91 92 93 94
    } else {
      constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
      return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
    }
  }
};
95

96
}  // namespace details
97

98
inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) {
99
  auto buffer_info =
100
      details::CastToPyBufferImpl<true, 0, float, int, double, int64_t, bool,
Q
qingqing01 已提交
101
                                  uint8_t, int8_t, platform::float16>()(tensor);
102 103 104
  return buffer_info;
}

105
template <typename T>
106
T TensorGetElement(const framework::Tensor &self, size_t offset) {
107 108 109 110
  if (platform::is_cpu_place(self.place())) {
    return self.data<T>()[offset];
  } else {
    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
F
fix  
fengjiayi 已提交
111
    framework::TensorCopySync(self, platform::CPUPlace(), dst.get());
112 113
    return dst->data<T>()[offset];
  }
114 115
}

Y
Yu Yang 已提交
116
// TODO(dzhwinter) : fix the redundant Tensor allocate and free
117
template <typename T>
118 119
void TensorSetElement(framework::Tensor *self, size_t offset, T elem) {
  if (platform::is_gpu_place(self->place())) {
Y
Yu Yang 已提交
120 121 122 123
    framework::Tensor dst;
    framework::TensorCopySync(*self, platform::CPUPlace(), &dst);
    dst.mutable_data<T>(platform::CPUPlace())[offset] = elem;
    framework::TensorCopySync(dst, self->place(), self);
124
  } else if (platform::is_cpu_place(self->place())) {
Y
Yu Yang 已提交
125
    self->mutable_data<T>(self->place())[offset] = elem;
126
  }
127 128
}

129
template <typename T>
Q
qijun 已提交
130
void PyCPUTensorSetFromArray(
131 132 133 134
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
Q
qijun 已提交
135
  std::vector<int64_t> dims;
136 137
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
138
    dims.push_back(static_cast<int>(array.shape()[i]));
139 140
  }

141 142
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
143 144 145
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

146
template <>
C
chengduoZH 已提交
147 148
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
149
inline void PyCPUTensorSetFromArray(
150 151 152 153 154
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CPUPlace place) {
155 156 157
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
158
    dims.push_back(static_cast<int>(array.shape()[i]));
159 160
  }

161 162
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
163 164 165
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}

166
#ifdef PADDLE_WITH_CUDA
Q
qijun 已提交
167 168
template <typename T>
void PyCUDATensorSetFromArray(
169 170 171 172
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
Q
qijun 已提交
173
  std::vector<int64_t> dims;
Q
qijun 已提交
174 175
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
176
    dims.push_back(static_cast<int>(array.shape()[i]));
Q
qijun 已提交
177
  }
Q
qijun 已提交
178

179 180
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
Y
Yu Yang 已提交
181 182
  paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
                                  cudaMemcpyHostToDevice);
183
}
184 185

template <>
C
chengduoZH 已提交
186 187
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
188
inline void PyCUDATensorSetFromArray(
189 190 191 192 193
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
    paddle::platform::CUDAPlace place) {
194 195 196
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
C
chengduoZH 已提交
197
    dims.push_back(static_cast<int>(array.shape()[i]));
198 199
  }

200 201
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
Y
Yu Yang 已提交
202 203 204
  paddle::platform::GpuMemcpySync(dst, array.data(),
                                  sizeof(uint16_t) * array.size(),
                                  cudaMemcpyHostToDevice);
205
}
C
chengduoZH 已提交
206 207 208

template <typename T>
void PyCUDAPinnedTensorSetFromArray(
209 210 211
    framework::Tensor *self,
    pybind11::array_t<T, pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
212 213 214 215 216 217 218
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

219 220
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<T>(place);
C
chengduoZH 已提交
221 222 223 224
  std::memcpy(dst, array.data(), sizeof(T) * array.size());
}

template <>
C
chengduoZH 已提交
225 226
// This following specialization maps uint16_t in the parameter type to
// platform::float16.
S
sneaxiy 已提交
227
inline void PyCUDAPinnedTensorSetFromArray(
228 229 230 231
    framework::Tensor *self,
    pybind11::array_t<uint16_t,
                      pybind11::array::c_style | pybind11::array::forcecast>
        array,
C
chengduoZH 已提交
232 233 234 235 236 237 238
    const paddle::platform::CUDAPinnedPlace &place) {
  std::vector<int64_t> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
    dims.push_back(static_cast<int>(array.shape()[i]));
  }

239 240
  self->Resize(framework::make_ddim(dims));
  auto *dst = self->mutable_data<platform::float16>(place);
C
chengduoZH 已提交
241 242
  std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
}
Q
qijun 已提交
243
#endif
244 245 246

}  // namespace pybind
}  // namespace paddle