From 4f4daa4b667b0753e9e9e69528563fb5d5666723 Mon Sep 17 00:00:00 2001
From: nhzlx <zlx_hg@163.com>
Date: Wed, 20 Mar 2019 13:41:43 +0000
Subject: [PATCH] cherry-pick from feature/anakin-engine: add data type for
 zero copy #16313 1. refine anakin engine 2. add data type for zero copy

align dev branch and PaddlePaddle:feature/anakin-engine brach
the cudnn workspace modify was not included for now, because we use a hard code way
in feature/anakin-engine branch. There should be a better way to implement it,
and subsequent submissions will be made.

test=develop
---
 paddle/fluid/inference/anakin/engine.cc            |  1 +
 .../inference/api/details/zero_copy_tensor.cc      | 14 ++++++++++++++
 paddle/fluid/inference/api/paddle_api.h            |  3 +++
 3 files changed, 18 insertions(+)
diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc
index 543ac9d6385..39be7865149 100644
--- a/paddle/fluid/inference/anakin/engine.cc
+++ b/paddle/fluid/inference/anakin/engine.cc
@@ -71,6 +71,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
     const std::map<std::string, framework::LoDTensor *> &inputs,
     const std::map<std::string, framework::LoDTensor *> &outputs,
     cudaStream_t stream) {
+  cudaDeviceSynchronize();
   for (const auto &input : inputs) {
     auto *tensor = input.second;
     auto *data = tensor->data<float>();
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 9a40cf4b60a..33de430fc8c 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -74,6 +74,19 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const {
   return res;
 }
 
+PaddleDType ZeroCopyTensor::type() {
+  EAGER_GET_TENSOR;
+  auto type = tensor->type();
+  if (type == framework::proto::VarType::FP32) {
+    return PaddleDType::FLOAT32;
+  } else if (type == framework::proto::VarType::INT64) {
+    return PaddleDType::INT64;
+  } else {
+    LOG(ERROR) << "unknown type, only support float32 and int64 now.";
+  }
+  return PaddleDType::FLOAT32;
+}
+
 template <typename T>
 void ZeroCopyTensor::copy_from_cpu(const T *data) {
   EAGER_GET_TENSOR;
@@ -119,6 +132,7 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
         static_cast<const platform::CUDADeviceContext *>(pool.Get(gpu_place));
     memory::Copy(platform::CPUPlace(), static_cast<void *>(data), gpu_place,
                  t_data, ele_num * sizeof(T), dev_ctx->stream());
+    cudaDeviceSynchronize();
 #else
     PADDLE_THROW("Not compile with CUDA, should not reach here.");
 #endif
diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
index 703fd180694..52c5cb34b53 100644
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -177,6 +177,8 @@ class ZeroCopyTensor {
     device_ = device;
   }
 
+  PaddleDType type();
+
  protected:
   explicit ZeroCopyTensor(void* scope) : scope_{scope} {}
   void SetName(const std::string& name) { name_ = name; }
@@ -191,6 +193,7 @@ class ZeroCopyTensor {
   // performance.
   mutable void* tensor_{nullptr};
   PaddlePlace place_;
+  PaddleDType dtype_;
   int device_;
 };
 
-- 
GitLab