zero_copy_tensor.cc 14.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "paddle/fluid/framework/data_layout_transform.h"
16 17 18
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
W
Wilber 已提交
19
#include "paddle/fluid/inference/api/paddle_tensor.h"
N
nhzlx 已提交
20
#include "paddle/fluid/memory/memcpy.h"
21
#include "paddle/fluid/platform/enforce.h"
22
#include "paddle/fluid/platform/float16.h"
23

24
namespace paddle_infer {
25

26 27
using float16 = paddle::platform::float16;

28
void Tensor::Reshape(const std::vector<int> &shape) {
W
Wilber 已提交
29 30
  PADDLE_ENFORCE_EQ(
      name_.empty(), false,
31
      paddle::platform::errors::PreconditionNotMet(
W
Wilber 已提交
32 33 34
          "Need to SetName first, so that the corresponding tensor can "
          "be retrieved."));
  PADDLE_ENFORCE_EQ(input_or_output_, true,
35
                    paddle::platform::errors::PermissionDenied(
W
Wilber 已提交
36
                        "Can't reshape the output tensor, it is readonly"));
37
  auto *scope = static_cast<paddle::framework::Scope *>(scope_);
38
  auto *var = scope->FindVar(name_);
W
Wilber 已提交
39
  PADDLE_ENFORCE_NOT_NULL(
40
      var, paddle::platform::errors::PreconditionNotMet(
W
Wilber 已提交
41
               "No tensor called [%s] in the runtime scope", name_));
42 43
  auto *tensor = var->GetMutable<paddle::framework::LoDTensor>();
  tensor->Resize(paddle::framework::make_ddim(shape));
44 45
}

46 47 48 49
#define EAGER_GET_TENSOR    \
  if (!tensor_) {           \
    tensor_ = FindTensor(); \
  }                         \
50
  auto *tensor = static_cast<paddle::framework::LoDTensor *>(tensor_);
51

52
template <typename T>
53
T *Tensor::mutable_data(PlaceType place) {
54
  EAGER_GET_TENSOR;
55 56
  PADDLE_ENFORCE_GT(
      tensor->numel(), 0,
57 58
      paddle::platform::errors::PreconditionNotMet(
          "You should call Tensor::Reshape(const std::vector<int> "
W
Wilber 已提交
59 60
          "&shape)"
          "function before retrieving mutable_data from input tensor."));
61
  switch (static_cast<int>(place)) {
62 63
    case static_cast<int>(PlaceType::kCPU): {
      return tensor->mutable_data<T>(paddle::platform::CPUPlace());
64
    }
65 66 67 68 69
    case static_cast<int>(PlaceType::kGPU): {
      return tensor->mutable_data<T>(paddle::platform::CUDAPlace(device_));
    }
    case static_cast<int>(PlaceType::kXPU): {
      return tensor->mutable_data<T>(paddle::platform::XPUPlace(device_));
70
    }
71 72 73
    case static_cast<int>(PlaceType::kNPU): {
      return tensor->mutable_data<T>(paddle::platform::NPUPlace(device_));
    }
74
    default:
75
      PADDLE_THROW(paddle::platform::errors::Unavailable(
76 77
          "Only CPU / CUDA / XPU / NPU places is supported. The place `%d` is "
          "not supported.",
78
          static_cast<int>(place)));
79 80 81 82 83 84
      break;
  }
  return nullptr;
}

template <typename T>
85
T *Tensor::data(PlaceType *place, int *size) const {
86
  EAGER_GET_TENSOR;
87 88
  auto *res = tensor->data<T>();

89 90 91 92 93 94
  if (paddle::platform::is_cpu_place(tensor->place())) {
    *place = PlaceType::kCPU;
  } else if (paddle::platform::is_gpu_place(tensor->place())) {
    *place = PlaceType::kGPU;
  } else if (paddle::platform::is_xpu_place(tensor->place())) {
    *place = PlaceType::kXPU;
95 96
  } else if (paddle::platform::is_npu_place(tensor->place())) {
    *place = PlaceType::kNPU;
97
  } else {
98
    *place = PlaceType::kUNK;
99 100 101 102 103 104
  }

  *size = tensor->numel();
  return res;
}

105
DataType Tensor::type() const {
106 107
  EAGER_GET_TENSOR;
  auto type = tensor->type();
108 109
  if (type == paddle::framework::proto::VarType::FP32) {
    return DataType::FLOAT32;
110 111
  } else if (type == paddle::framework::proto::VarType::FP16) {
    return DataType::FLOAT16;
112 113 114 115 116 117
  } else if (type == paddle::framework::proto::VarType::INT64) {
    return DataType::INT64;
  } else if (type == paddle::framework::proto::VarType::INT32) {
    return DataType::INT32;
  } else if (type == paddle::framework::proto::VarType::UINT8) {
    return DataType::UINT8;
118 119
  } else if (type == paddle::framework::proto::VarType::INT8) {
    return DataType::INT8;
120
  }
121
  return DataType::FLOAT32;
122 123
}

N
nhzlx 已提交
124
template <typename T>
125
void Tensor::CopyFromCpu(const T *data) {
N
nhzlx 已提交
126
  EAGER_GET_TENSOR;
W
Wilber 已提交
127
  PADDLE_ENFORCE_GE(tensor->numel(), 0,
128 129
                    paddle::platform::errors::PreconditionNotMet(
                        "You should call Tensor::Reshape(const "
W
Wilber 已提交
130 131
                        "std::vector<int> &shape)"
                        "function before copying data from cpu."));
N
nhzlx 已提交
132 133
  size_t ele_size = tensor->numel() * sizeof(T);

134 135
  if (place_ == PlaceType::kCPU) {
    auto *t_data = tensor->mutable_data<T>(paddle::platform::CPUPlace());
N
nhzlx 已提交
136
    std::memcpy(static_cast<void *>(t_data), data, ele_size);
137
  } else if (place_ == PlaceType::kGPU) {
138
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
139 140 141
    paddle::platform::DeviceContextPool &pool =
        paddle::platform::DeviceContextPool::Instance();
    paddle::platform::CUDAPlace gpu_place(device_);
N
nhzlx 已提交
142
    auto *t_data = tensor->mutable_data<T>(gpu_place);
143 144
    auto *dev_ctx = static_cast<const paddle::platform::CUDADeviceContext *>(
        pool.Get(gpu_place));
N
nhzlx 已提交
145

146 147 148
    paddle::memory::Copy(gpu_place, static_cast<void *>(t_data),
                         paddle::platform::CPUPlace(), data, ele_size,
                         dev_ctx->stream());
N
nhzlx 已提交
149
#else
150 151 152
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with CUDA place because paddle is not compiled "
        "with CUDA."));
N
nhzlx 已提交
153
#endif
154
  } else if (place_ == PlaceType::kXPU) {
155
#ifdef PADDLE_WITH_XPU
156
    paddle::platform::XPUPlace xpu_place(device_);
157
    auto *t_data = tensor->mutable_data<T>(xpu_place);
158 159
    paddle::memory::Copy(xpu_place, static_cast<void *>(t_data),
                         paddle::platform::CPUPlace(), data, ele_size);
160
#else
161 162 163
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with XPU place because paddle is not compiled "
        "with XPU."));
W
Wilber 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
#endif
  } else if (place_ == PlaceType::kNPU) {
#ifdef PADDLE_WITH_ASCEND_CL
    paddle::platform::DeviceContextPool &pool =
        paddle::platform::DeviceContextPool::Instance();
    paddle::platform::NPUPlace npu_place(device_);
    auto *t_data = tensor->mutable_data<T>(npu_place);
    auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
        pool.Get(npu_place));
    paddle::memory::Copy(npu_place, static_cast<void *>(t_data),
                         paddle::platform::CPUPlace(), data, ele_size,
                         dev_ctx->stream());
#else
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with NPU place because paddle is not compiled "
        "with NPU."));
180 181 182
#endif
  } else {
    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
W
Wilber 已提交
183
        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
N
nhzlx 已提交
184 185 186 187
  }
}

template <typename T>
188
void Tensor::CopyToCpu(T *data) {
N
nhzlx 已提交
189 190 191 192 193
  EAGER_GET_TENSOR;
  auto ele_num = tensor->numel();
  auto *t_data = tensor->data<T>();
  auto t_place = tensor->place();

194 195 196 197 198 199
  paddle::framework::Tensor out;
  auto mem_allocation = std::make_shared<paddle::memory::Allocation>(
      static_cast<void *>(data), ele_num * sizeof(T),
      paddle::platform::CPUPlace());
  out.ResetHolder(mem_allocation);

200
  if (paddle::platform::is_cpu_place(t_place)) {
201 202 203 204 205 206 207 208 209
#ifdef PADDLE_WITH_MKLDNN
    if (tensor->layout() == paddle::framework::DataLayout::kMKLDNN)
      paddle::framework::innerTransDataLayoutFromMKLDNN(
          tensor->layout(), paddle::platform::MKLDNNDeviceContext::tls()
                                .get_cur_paddle_data_layout(),
          *tensor, &out, paddle::platform::CPUPlace(), true);
    else
      std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
#else
N
nhzlx 已提交
210
    std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
211
#endif
212
  } else if (place_ == PlaceType::kGPU) {
213
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
214 215 216 217 218 219 220 221
    paddle::platform::DeviceContextPool &pool =
        paddle::platform::DeviceContextPool::Instance();
    auto gpu_place = BOOST_GET_CONST(paddle::platform::CUDAPlace, t_place);
    auto *dev_ctx = static_cast<const paddle::platform::CUDADeviceContext *>(
        pool.Get(gpu_place));
    paddle::memory::Copy(paddle::platform::CPUPlace(),
                         static_cast<void *>(data), gpu_place, t_data,
                         ele_num * sizeof(T), dev_ctx->stream());
222 223 224
#ifdef PADDLE_WITH_HIP
    hipStreamSynchronize(dev_ctx->stream());
#else
225
    cudaStreamSynchronize(dev_ctx->stream());
226
#endif
N
nhzlx 已提交
227
#else
228 229 230
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with CUDA place because paddle is not compiled "
        "with CUDA."));
N
nhzlx 已提交
231
#endif
232
  } else if (place_ == PlaceType::kXPU) {
233
#ifdef PADDLE_WITH_XPU
234 235 236 237
    auto xpu_place = BOOST_GET_CONST(paddle::platform::XPUPlace, t_place);
    paddle::memory::Copy(paddle::platform::CPUPlace(),
                         static_cast<void *>(data), xpu_place, t_data,
                         ele_num * sizeof(T));
238
#else
239 240 241
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with XPU place because paddle is not compiled "
        "with XPU."));
W
Wilber 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
#endif
  } else if (place_ == PlaceType::kNPU) {
#ifdef PADDLE_WITH_ASCEND_CL
    paddle::platform::DeviceContextPool &pool =
        paddle::platform::DeviceContextPool::Instance();
    auto npu_place = BOOST_GET_CONST(paddle::platform::NPUPlace, t_place);
    auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
        pool.Get(npu_place));
    paddle::memory::Copy(paddle::platform::CPUPlace(),
                         static_cast<void *>(data), npu_place, t_data,
                         ele_num * sizeof(T), dev_ctx->stream());
    aclrtSynchronizeStream(dev_ctx->stream());
#else
    PADDLE_THROW(paddle::platform::errors::Unavailable(
        "Can not create tensor with NPU place because paddle is not compiled "
        "with NPU."));
258 259 260
#endif
  } else {
    PADDLE_THROW(paddle::platform::errors::InvalidArgument(
W
Wilber 已提交
261
        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
N
nhzlx 已提交
262 263
  }
}
264 265 266 267 268
template PD_INFER_DECL void Tensor::CopyFromCpu<float>(const float *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int64_t>(const int64_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int32_t>(const int32_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
template PD_INFER_DECL void Tensor::CopyFromCpu<int8_t>(const int8_t *data);
269
template PD_INFER_DECL void Tensor::CopyFromCpu<float16>(const float16 *data);
270

271 272 273 274 275 276
template PD_INFER_DECL void Tensor::CopyToCpu<float>(float *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int64_t>(int64_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int32_t>(int32_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<uint8_t>(uint8_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<int8_t>(int8_t *data);
template PD_INFER_DECL void Tensor::CopyToCpu<float16>(float16 *data);
277

278 279 280 281 282 283 284 285 286 287
template PD_INFER_DECL float *Tensor::data<float>(PlaceType *place,
                                                  int *size) const;
template PD_INFER_DECL int64_t *Tensor::data<int64_t>(PlaceType *place,
                                                      int *size) const;
template PD_INFER_DECL int32_t *Tensor::data<int32_t>(PlaceType *place,
                                                      int *size) const;
template PD_INFER_DECL uint8_t *Tensor::data<uint8_t>(PlaceType *place,
                                                      int *size) const;
template PD_INFER_DECL int8_t *Tensor::data<int8_t>(PlaceType *place,
                                                    int *size) const;
288

289 290 291 292 293
template PD_INFER_DECL float *Tensor::mutable_data<float>(PlaceType place);
template PD_INFER_DECL int64_t *Tensor::mutable_data<int64_t>(PlaceType place);
template PD_INFER_DECL int32_t *Tensor::mutable_data<int32_t>(PlaceType place);
template PD_INFER_DECL uint8_t *Tensor::mutable_data<uint8_t>(PlaceType place);
template PD_INFER_DECL int8_t *Tensor::mutable_data<int8_t>(PlaceType place);
294

295 296 297 298 299 300
Tensor::Tensor(void *scope) : scope_{scope} {
  PADDLE_ENFORCE_NOT_NULL(scope_,
                          paddle::platform::errors::PreconditionNotMet(
                              "The `scope` can not be nullptr. It should be "
                              "set to the pointer of scope."));
}
301

302
void *Tensor::FindTensor() const {
W
Wilber 已提交
303 304
  PADDLE_ENFORCE_EQ(
      name_.empty(), false,
305
      paddle::platform::errors::PreconditionNotMet(
W
Wilber 已提交
306 307
          "Need to SetName first, so that the corresponding tensor can "
          "be retrieved."));
308
  auto *scope = static_cast<paddle::framework::Scope *>(scope_);
309
  auto *var = scope->FindVar(name_);
W
Wilber 已提交
310
  PADDLE_ENFORCE_NOT_NULL(
311
      var, paddle::platform::errors::PreconditionNotMet(
W
Wilber 已提交
312
               "No tensor called [%s] in the runtime scope", name_));
313
  auto *tensor = var->GetMutable<paddle::framework::LoDTensor>();
314 315 316
  return tensor;
}

317
std::vector<int> Tensor::shape() const {
318
  EAGER_GET_TENSOR;
W
Wilber 已提交
319
  PADDLE_ENFORCE_NOT_NULL(
320
      tensor_, paddle::platform::errors::PreconditionNotMet(
W
Wilber 已提交
321
                   "Not found tensor called %s in the scope", name_));
322
  return paddle::framework::vectorize<int>(tensor->dims());
323 324
}

325
void Tensor::SetLoD(const std::vector<std::vector<size_t>> &x) {
326
  EAGER_GET_TENSOR;
327
  paddle::framework::LoD lod;
328 329 330 331 332 333
  for (auto &level : x) {
    lod.emplace_back(level);
  }
  tensor->set_lod(lod);
}

334
std::vector<std::vector<size_t>> Tensor::lod() const {
335
  EAGER_GET_TENSOR;
336 337 338 339 340 341 342
  std::vector<std::vector<size_t>> res;
  for (auto &level : tensor->lod()) {
    res.emplace_back(level);
  }
  return res;
}

343 344 345 346 347 348 349 350 351 352
void Tensor::SetName(const std::string &name) { name_ = name; }

const std::string &Tensor::name() const { return name_; }

void Tensor::SetPlace(PlaceType place, int device) {
  place_ = place;
  device_ = device;
}

}  // namespace paddle_infer