From 4f4daa4b667b0753e9e9e69528563fb5d5666723 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Wed, 20 Mar 2019 13:41:43 +0000 Subject: [PATCH] cherry-pick from feature/anakin-engine: add data type for zero copy #16313 1. refine anakin engine 2. add data type for zero copy align dev branch and PaddlePaddle:feature/anakin-engine brach the cudnn workspace modify was not included for now, because we use a hard code way in feature/anakin-engine branch. There should be a better way to implement it, and subsequent submissions will be made. test=develop --- paddle/fluid/inference/anakin/engine.cc | 1 + .../inference/api/details/zero_copy_tensor.cc | 14 ++++++++++++++ paddle/fluid/inference/api/paddle_api.h | 3 +++ 3 files changed, 18 insertions(+) diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 543ac9d6385..39be7865149 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -71,6 +71,7 @@ void AnakinEngine::Execute( const std::map &inputs, const std::map &outputs, cudaStream_t stream) { + cudaDeviceSynchronize(); for (const auto &input : inputs) { auto *tensor = input.second; auto *data = tensor->data(); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 9a40cf4b60a..33de430fc8c 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -74,6 +74,19 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { return res; } +PaddleDType ZeroCopyTensor::type() { + EAGER_GET_TENSOR; + auto type = tensor->type(); + if (type == framework::proto::VarType::FP32) { + return PaddleDType::FLOAT32; + } else if (type == framework::proto::VarType::INT64) { + return PaddleDType::INT64; + } else { + LOG(ERROR) << "unknown type, only support float32 and int64 now."; + } + return PaddleDType::FLOAT32; +} + template void ZeroCopyTensor::copy_from_cpu(const T *data) { EAGER_GET_TENSOR; @@ -119,6 +132,7 @@ void ZeroCopyTensor::copy_to_cpu(T *data) { static_cast(pool.Get(gpu_place)); memory::Copy(platform::CPUPlace(), static_cast(data), gpu_place, t_data, ele_num * sizeof(T), dev_ctx->stream()); + cudaDeviceSynchronize(); #else PADDLE_THROW("Not compile with CUDA, should not reach here."); #endif diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 703fd180694..52c5cb34b53 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -177,6 +177,8 @@ class ZeroCopyTensor { device_ = device; } + PaddleDType type(); + protected: explicit ZeroCopyTensor(void* scope) : scope_{scope} {} void SetName(const std::string& name) { name_ = name; } @@ -191,6 +193,7 @@ class ZeroCopyTensor { // performance. mutable void* tensor_{nullptr}; PaddlePlace place_; + PaddleDType dtype_; int device_; }; -- GitLab