From 1861ca88f1e190a339bf5581e8ce7f1eb9307949 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?=
 <39303645+Shixiaowei02@users.noreply.github.com>
Date: Wed, 4 Mar 2020 13:18:14 +0800
Subject: [PATCH] serialize the PaddleTensor, test=develop (#22810)

* encapsulate the PaddleTensorToLoDTensor, test=develop

* serialize the pd_tensor, test=develop

* serialize tensors to file, test=develop
---
 paddle/fluid/inference/CMakeLists.txt         |   1 +
 .../fluid/inference/api/analysis_predictor.cc |  94 +++++-----
 paddle/fluid/inference/api/helper.h           |  18 ++
 paddle/fluid/inference/utils/CMakeLists.txt   |   2 +
 paddle/fluid/inference/utils/io_utils.cc      | 163 ++++++++++++++++++
 paddle/fluid/inference/utils/io_utils.h       |  40 +++++
 .../fluid/inference/utils/io_utils_tester.cc  |  97 +++++++++++
 paddle/fluid/pybind/CMakeLists.txt            |   2 +-
 paddle/fluid/pybind/inference_api.cc          |  29 ++--
 9 files changed, 386 insertions(+), 60 deletions(-)
 create mode 100644 paddle/fluid/inference/utils/io_utils.cc
 create mode 100644 paddle/fluid/inference/utils/io_utils.h
 create mode 100644 paddle/fluid/inference/utils/io_utils_tester.cc
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index bca662b870b..88723e24184 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -76,6 +76,7 @@ set(SHARED_INFERENCE_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/utils/io_utils.cc
     ${mkldnn_quantizer_src_file})
 
 # Create shared inference library defaultly
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 107e5ae7d81..5aa3d7a0527 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -71,6 +71,57 @@ bool IsPersistable(const framework::VarDesc *var) {
 }
 }  // namespace
 
+bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
+                             const platform::Place &place) {
+  framework::DDim ddim = framework::make_ddim(pt.shape);
+  void *input_ptr;
+  if (pt.dtype == PaddleDType::INT64) {
+    input_ptr = t->mutable_data<int64_t>(ddim, place);
+  } else if (pt.dtype == PaddleDType::FLOAT32) {
+    input_ptr = t->mutable_data<float>(ddim, place);
+  } else if (pt.dtype == PaddleDType::INT32) {
+    input_ptr = t->mutable_data<int32_t>(ddim, place);
+  } else {
+    LOG(ERROR) << "unsupported feed type " << pt.dtype;
+    return false;
+  }
+
+  PADDLE_ENFORCE_NOT_NULL(
+      input_ptr,
+      paddle::platform::errors::Fatal(
+          "Cannot convert to LoDTensor because LoDTensor creation failed."));
+  PADDLE_ENFORCE_NOT_NULL(
+      pt.data.data(),
+      paddle::platform::errors::InvalidArgument(
+          "The data contained in the input PaddleTensor is illegal."));
+
+  if (platform::is_cpu_place(place)) {
+    // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
+    std::memcpy(static_cast<void *>(input_ptr), pt.data.data(),
+                pt.data.length());
+  } else {
+#ifdef PADDLE_WITH_CUDA
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto *dev_ctx =
+        static_cast<const platform::CUDADeviceContext *>(pool.Get(place));
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(place);
+    memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
+                 platform::CPUPlace(), pt.data.data(), pt.data.length(),
+                 dev_ctx->stream());
+#else
+    PADDLE_THROW(paddle::platform::errors::Fatal(
+        "Not compile with CUDA, should not reach here."));
+#endif
+  }
+  // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
+  framework::LoD lod;
+  for (auto &level : pt.lod) {
+    lod.emplace_back(level);
+  }
+  t->set_lod(lod);
+  return true;
+}
+
 bool AnalysisPredictor::Init(
     const std::shared_ptr<framework::Scope> &parent_scope,
     const std::shared_ptr<framework::ProgramDesc> &program) {
@@ -274,47 +325,10 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
   feed_tensors_.resize(inputs.size());
 
   for (size_t i = 0; i < inputs.size(); ++i) {
-    auto &input = feed_tensors_[i];
-    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
-    void *input_ptr;
-    if (inputs[i].dtype == PaddleDType::INT64) {
-      input_ptr = input.mutable_data<int64_t>(ddim, place_);
-    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
-      input_ptr = input.mutable_data<float>(ddim, place_);
-    } else if (inputs[i].dtype == PaddleDType::INT32) {
-      input_ptr = input.mutable_data<int32_t>(ddim, place_);
-    } else {
-      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
+    framework::LoDTensor *input = &feed_tensors_[i];
+    if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) {
       return false;
     }
-
-    PADDLE_ENFORCE_NOT_NULL(input_ptr);
-    PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
-
-    if (platform::is_cpu_place(place_)) {
-      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
-      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
-                  inputs[i].data.length());
-    } else {
-#ifdef PADDLE_WITH_CUDA
-      platform::DeviceContextPool &pool =
-          platform::DeviceContextPool::Instance();
-      auto *dev_ctx =
-          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
-      auto dst_gpu_place = boost::get<platform::CUDAPlace>(place_);
-      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
-                   platform::CPUPlace(), inputs[i].data.data(),
-                   inputs[i].data.length(), dev_ctx->stream());
-#else
-      PADDLE_THROW("Not compile with CUDA, should not reach here.");
-#endif
-    }
-    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
-    framework::LoD lod;
-    for (auto &level : inputs[i].lod) {
-      lod.emplace_back(level);
-    }
-    input.set_lod(lod);
     int idx = -1;
     if (config_.specify_input_name_) {
       auto name = inputs[i].name;
@@ -326,7 +340,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
     } else {
       idx = boost::get<int>(feeds_[i]->GetAttr("col"));
     }
-    framework::SetFeedVariable(scope, input, "feed", idx);
+    framework::SetFeedVariable(scope, *input, "feed", idx);
   }
   return true;
 }
diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h
index 907d35b298c..b58c300c2ed 100644
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
@@ -39,6 +39,24 @@ extern std::string paddle::framework::DataTypeToString(
 namespace paddle {
 namespace inference {
 
+template <typename T>
+constexpr PaddleDType PaddleTensorGetDType();
+
+template <>
+constexpr PaddleDType PaddleTensorGetDType<int32_t>() {
+  return PaddleDType::INT32;
+}
+
+template <>
+constexpr PaddleDType PaddleTensorGetDType<int64_t>() {
+  return PaddleDType::INT64;
+}
+
+template <>
+constexpr PaddleDType PaddleTensorGetDType<float>() {
+  return PaddleDType::FLOAT32;
+}
+
 using paddle::framework::DataTypeToString;
 
 // Timer for timer
diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt
index 2104e4ac722..956cd739371 100644
--- a/paddle/fluid/inference/utils/CMakeLists.txt
+++ b/paddle/fluid/inference/utils/CMakeLists.txt
@@ -1,2 +1,4 @@
 cc_library(benchmark SRCS benchmark.cc DEPS enforce)
 cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
+cc_library(infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor)
+cc_test(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils)
diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc
new file mode 100644
index 00000000000..346fa481325
--- /dev/null
+++ b/paddle/fluid/inference/utils/io_utils.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/utils/io_utils.h"
+#include <string>
+#include <vector>
+#include "paddle/fluid/inference/analysis/helper.h"
+
+namespace paddle {
+namespace inference {
+
+// =========================================================
+//       Item        |        Type       |      Bytes
+// ---------------------------------------------------------
+//      Version      |      uint32_t     |        4
+// ---------------------------------------------------------
+//   Bytes of `Name` |      uint64_t     |        8
+//        Name       |        char       |  Bytes of `Name`
+// ---------------------------------------------------------
+//      LoD Level    |      uint64_t     |        8
+//  Bytes of `LoD[0]`|      uint64_t     |        8
+//       LoD[0]      |      uint64_t     | Bytes of `LoD[0]`
+//        ...        |         ...       |       ...
+// ---------------------------------------------------------
+//   Dims of `Shape` |      uint64_t     |        8
+//       Shape       |      uint64_t     |    Dims * 4
+// ---------------------------------------------------------
+//       Dtype       |       int32_t     |        4
+//  Bytes of `Data`  |      uint64_t     |        8
+//        Data       |        Dtype      |  Bytes of `Data`
+// =========================================================
+void SerializePDTensorToStream(std::ostream *os, const PaddleTensor &tensor) {
+  // 1. Version
+  os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
+            sizeof(kCurPDTensorVersion));
+  // 2. Name
+  uint64_t name_bytes = tensor.name.size();
+  os->write(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
+  os->write(tensor.name.c_str(), name_bytes);
+  // 3. LoD
+  auto lod = tensor.lod;
+  uint64_t lod_size = lod.size();
+  os->write(reinterpret_cast<const char *>(&lod_size), sizeof(lod_size));
+  for (auto &each : lod) {
+    auto size = each.size() * sizeof(size_t);
+    os->write(reinterpret_cast<const char *>(&size), sizeof(size));
+    os->write(reinterpret_cast<const char *>(each.data()),
+              static_cast<std::streamsize>(size));
+  }
+  // 4. Shape
+  size_t dims = tensor.shape.size();
+  os->write(reinterpret_cast<const char *>(&dims), sizeof(dims));
+  os->write(reinterpret_cast<const char *>(tensor.shape.data()),
+            sizeof(int) * dims);
+  // 5. Data
+  os->write(reinterpret_cast<const char *>(&tensor.dtype),
+            sizeof(tensor.dtype));
+  uint64_t length = tensor.data.length();
+  os->write(reinterpret_cast<const char *>(&length), sizeof(size_t));
+  os->write(reinterpret_cast<const char *>(tensor.data.data()), length);
+}
+
+void DeserializePDTensorToStream(std::istream &is, PaddleTensor *tensor) {
+  // 1. Version
+  uint32_t version;
+  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  // 2. Name
+  uint64_t name_bytes;
+  is.read(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
+  std::vector<char> bytes(name_bytes);
+  is.read(bytes.data(), name_bytes);
+  tensor->name = std::string(bytes.data(), name_bytes);
+  // 3. LoD
+  uint64_t lod_level;
+  is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
+  auto *lod = &(tensor->lod);
+  lod->resize(lod_level);
+  for (uint64_t i = 0; i < lod_level; ++i) {
+    uint64_t size;
+    is.read(reinterpret_cast<char *>(&size), sizeof(size));
+    std::vector<size_t> tmp(size / sizeof(size_t));
+    is.read(reinterpret_cast<char *>(tmp.data()),
+            static_cast<std::streamsize>(size));
+    (*lod)[i] = tmp;
+  }
+  // 4. Shape
+  size_t dims;
+  is.read(reinterpret_cast<char *>(&dims), sizeof(dims));
+  tensor->shape.resize(dims);
+  is.read(reinterpret_cast<char *>(tensor->shape.data()), sizeof(int) * dims);
+  // 5. Data
+  uint64_t length;
+  is.read(reinterpret_cast<char *>(&tensor->dtype), sizeof(tensor->dtype));
+  is.read(reinterpret_cast<char *>(&length), sizeof(length));
+  tensor->data.Resize(length);
+  is.read(reinterpret_cast<char *>(tensor->data.data()), length);
+}
+
+// =========================================================
+//       Item        |        Type       |      Bytes
+// ---------------------------------------------------------
+//      Version      |      uint32_t     |        4
+// ---------------------------------------------------------
+//   Size of Tensors |      uint64_t     |        8
+//      Tensors      |        ----       |       ---
+// ---------------------------------------------------------
+void SerializePDTensorsToStream(std::ostream *os,
+                                const std::vector<PaddleTensor> &tensors) {
+  // 1. Version
+  os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
+            sizeof(kCurPDTensorVersion));
+  // 2. Tensors
+  uint64_t num = tensors.size();
+  os->write(reinterpret_cast<char *>(&num), sizeof(num));
+  for (const auto &tensor : tensors) {
+    SerializePDTensorToStream(os, tensor);
+  }
+}
+
+void DeserializePDTensorsToStream(std::istream &is,
+                                  std::vector<PaddleTensor> *tensors) {
+  // 1. Version
+  uint32_t version;
+  is.read(reinterpret_cast<char *>(&version), sizeof(version));
+  // 2. Tensors
+  uint64_t num;
+  is.read(reinterpret_cast<char *>(&num), sizeof(num));
+  tensors->resize(num);
+  for (auto &tensor : *tensors) {
+    DeserializePDTensorToStream(is, &tensor);
+  }
+}
+
+void SerializePDTensorsToFile(const std::string &path,
+                              const std::vector<PaddleTensor> &tensors) {
+  std::ofstream fout(path, std::ios::binary);
+  SerializePDTensorsToStream(&fout, tensors);
+  fout.close();
+}
+
+void DeserializePDTensorsToFile(const std::string &path,
+                                std::vector<PaddleTensor> *tensors) {
+  bool is_present = analysis::FileExists(path);
+  PADDLE_ENFORCE_EQ(is_present, true, platform::errors::InvalidArgument(
+                                          "Cannot open %s to read", path));
+  std::ifstream fin(path, std::ios::binary);
+  DeserializePDTensorsToStream(fin, tensors);
+  fin.close();
+}
+
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/utils/io_utils.h b/paddle/fluid/inference/utils/io_utils.h
new file mode 100644
index 00000000000..853aba168b5
--- /dev/null
+++ b/paddle/fluid/inference/utils/io_utils.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/inference/api/paddle_api.h"
+
+namespace paddle {
+namespace inference {
+
+constexpr uint32_t kCurPDTensorVersion = 0;
+
+void SerializePDTensorToStream(std::ostream* os, const PaddleTensor& tensor);
+void DeserializePDTensorToStream(std::istream& is, PaddleTensor* tensor);
+
+void SerializePDTensorsToStream(std::ostream* os,
+                                const std::vector<PaddleTensor>& tensors);
+void DeserializePDTensorsToStream(std::istream& is,
+                                  std::vector<PaddleTensor>* tensors);
+
+void SerializePDTensorsToFile(const std::string& path,
+                              const std::vector<PaddleTensor>& tensors);
+void DeserializePDTensorsToFile(const std::string& path,
+                                std::vector<PaddleTensor>* tensors);
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc
new file mode 100644
index 00000000000..c8aa03c619e
--- /dev/null
+++ b/paddle/fluid/inference/utils/io_utils_tester.cc
@@ -0,0 +1,97 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "paddle/fluid/inference/api/helper.h"
+#include "paddle/fluid/inference/utils/io_utils.h"
+
+namespace paddle {
+namespace inference {
+namespace {
+
+bool pd_tensor_equal(const paddle::PaddleTensor& ref,
+                     const paddle::PaddleTensor& t) {
+  bool is_equal = true;
+  VLOG(3) << "ref.name: " << ref.name << ", t.name: " << t.name;
+  VLOG(3) << "ref.dtype: " << ref.dtype << ", t.dtype: " << t.dtype;
+  VLOG(3) << "ref.lod_level: " << ref.lod.size()
+          << ", t.dtype: " << t.lod.size();
+  VLOG(3) << "ref.data_len: " << ref.data.length()
+          << ", t.data_len: " << t.data.length();
+  return is_equal && (ref.name == t.name) && (ref.lod == t.lod) &&
+         (ref.dtype == t.dtype) &&
+         (std::memcmp(ref.data.data(), t.data.data(), ref.data.length()) == 0);
+}
+
+template <typename T>
+void test_io_utils() {
+  std::vector<T> input({6, 8});
+  paddle::PaddleTensor in;
+  in.name = "Hello";
+  in.shape = {1, 2};
+  in.lod = std::vector<std::vector<size_t>>{{0, 1}};
+  in.data = paddle::PaddleBuf(static_cast<void*>(input.data()),
+                              input.size() * sizeof(T));
+  in.dtype = paddle::inference::PaddleTensorGetDType<T>();
+  std::stringstream ss;
+  paddle::inference::SerializePDTensorToStream(&ss, in);
+  paddle::PaddleTensor out;
+  paddle::inference::DeserializePDTensorToStream(ss, &out);
+  ASSERT_TRUE(pd_tensor_equal(in, out));
+}
+}  // namespace
+}  // namespace inference
+}  // namespace paddle
+
+TEST(infer_io_utils, float32) { paddle::inference::test_io_utils<float>(); }
+TEST(infer_io_utils, int64) { paddle::inference::test_io_utils<int64_t>(); }
+
+TEST(infer_io_utils, tensors) {
+  // Create a float32 tensor.
+  std::vector<float> input_fp32({1.1f, 3.2f, 5.0f, 8.2f});
+  paddle::PaddleTensor in_fp32;
+  in_fp32.name = "Tensor.fp32_0";
+  in_fp32.shape = {2, 2};
+  in_fp32.data = paddle::PaddleBuf(static_cast<void*>(input_fp32.data()),
+                                   input_fp32.size() * sizeof(float));
+  in_fp32.dtype = paddle::inference::PaddleTensorGetDType<float>();
+
+  // Create a int64 tensor.
+  std::vector<float> input_int64({5, 8});
+  paddle::PaddleTensor in_int64;
+  in_int64.name = "Tensor.int64_0";
+  in_int64.shape = {1, 2};
+  in_int64.lod = std::vector<std::vector<size_t>>{{0, 1}};
+  in_int64.data = paddle::PaddleBuf(static_cast<void*>(input_int64.data()),
+                                    input_int64.size() * sizeof(int64_t));
+  in_int64.dtype = paddle::inference::PaddleTensorGetDType<int64_t>();
+
+  // Serialize tensors.
+  std::vector<paddle::PaddleTensor> tensors_in({in_fp32, in_int64});
+  std::string file_path = "./io_utils_tensors";
+  paddle::inference::SerializePDTensorsToFile(file_path, tensors_in);
+
+  // Deserialize tensors.
+  std::vector<paddle::PaddleTensor> tensors_out;
+  paddle::inference::DeserializePDTensorsToFile(file_path, &tensors_out);
+
+  // Check results.
+  ASSERT_EQ(tensors_in.size(), tensors_out.size());
+  for (size_t i = 0; i < tensors_in.size(); ++i) {
+    ASSERT_TRUE(
+        paddle::inference::pd_tensor_equal(tensors_in[i], tensors_out[i]));
+  }
+}
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 87dceb1850f..0fad32d160f 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -1,7 +1,7 @@
 set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune
   feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool
   analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context
-  gloo_wrapper)
+  gloo_wrapper infer_io_utils)
 
 if (WITH_NCCL)
   set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper)
diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
index 46babdcc6ef..2d5aae960ac 100644
--- a/paddle/fluid/pybind/inference_api.cc
+++ b/paddle/fluid/pybind/inference_api.cc
@@ -27,8 +27,10 @@
 #include <utility>
 #include <vector>
 #include "paddle/fluid/inference/api/analysis_predictor.h"
+#include "paddle/fluid/inference/api/helper.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_pass_builder.h"
+#include "paddle/fluid/inference/utils/io_utils.h"
 
 namespace py = pybind11;
 
@@ -78,24 +80,6 @@ void PaddleBufReset(PaddleBuf &buf, py::array_t<T> data) {  // NOLINT
               static_cast<T *>(buf.data()));
 }
 
-template <typename T>
-constexpr PaddleDType PaddleTensorGetDType();
-
-template <>
-constexpr PaddleDType PaddleTensorGetDType<int32_t>() {
-  return PaddleDType::INT32;
-}
-
-template <>
-constexpr PaddleDType PaddleTensorGetDType<int64_t>() {
-  return PaddleDType::INT64;
-}
-
-template <>
-constexpr PaddleDType PaddleTensorGetDType<float>() {
-  return PaddleDType::FLOAT32;
-}
-
 template <typename T>
 PaddleTensor PaddleTensorCreate(
     py::array_t<T> data, const std::string name = "",
@@ -111,7 +95,7 @@ PaddleTensor PaddleTensorCreate(
     tensor.data = PaddleBuf(data.mutable_data(), data.size() * sizeof(T));
   }
 
-  tensor.dtype = PaddleTensorGetDType<T>();
+  tensor.dtype = inference::PaddleTensorGetDType<T>();
   tensor.name = name;
   tensor.lod = lod;
   tensor.shape.resize(data.ndim());
@@ -192,6 +176,12 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) {  // NOLINT
   }
   return array;
 }
+
+py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) {  // NOLINT
+  std::stringstream ss;
+  paddle::inference::SerializePDTensorToStream(&ss, tensor);
+  return static_cast<py::bytes>(ss.str());
+}
 }  // namespace
 
 void BindInferenceApi(py::module *m) {
@@ -214,6 +204,7 @@ void BindInferenceApi(py::module *m) {
   m->def("create_paddle_predictor",
          &paddle::CreatePaddlePredictor<NativeConfig>);
   m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
+  m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
 }
 
 namespace {
-- 
GitLab