未验证 提交 1861ca88 编写于 作者: 石晓伟 提交者: GitHub

serialize the PaddleTensor, test=develop (#22810)

* encapsulate the PaddleTensorToLoDTensor, test=develop

* serialize the pd_tensor, test=develop

* serialize tensors to file, test=develop
上级 72ff5a09
...@@ -76,6 +76,7 @@ set(SHARED_INFERENCE_SRCS ...@@ -76,6 +76,7 @@ set(SHARED_INFERENCE_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/io_utils.cc
${mkldnn_quantizer_src_file}) ${mkldnn_quantizer_src_file})
# Create shared inference library defaultly # Create shared inference library defaultly
......
...@@ -71,6 +71,57 @@ bool IsPersistable(const framework::VarDesc *var) { ...@@ -71,6 +71,57 @@ bool IsPersistable(const framework::VarDesc *var) {
} }
} // namespace } // namespace
bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
const platform::Place &place) {
framework::DDim ddim = framework::make_ddim(pt.shape);
void *input_ptr;
if (pt.dtype == PaddleDType::INT64) {
input_ptr = t->mutable_data<int64_t>(ddim, place);
} else if (pt.dtype == PaddleDType::FLOAT32) {
input_ptr = t->mutable_data<float>(ddim, place);
} else if (pt.dtype == PaddleDType::INT32) {
input_ptr = t->mutable_data<int32_t>(ddim, place);
} else {
LOG(ERROR) << "unsupported feed type " << pt.dtype;
return false;
}
PADDLE_ENFORCE_NOT_NULL(
input_ptr,
paddle::platform::errors::Fatal(
"Cannot convert to LoDTensor because LoDTensor creation failed."));
PADDLE_ENFORCE_NOT_NULL(
pt.data.data(),
paddle::platform::errors::InvalidArgument(
"The data contained in the input PaddleTensor is illegal."));
if (platform::is_cpu_place(place)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), pt.data.data(),
pt.data.length());
} else {
#ifdef PADDLE_WITH_CUDA
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *dev_ctx =
static_cast<const platform::CUDADeviceContext *>(pool.Get(place));
auto dst_gpu_place = boost::get<platform::CUDAPlace>(place);
memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
platform::CPUPlace(), pt.data.data(), pt.data.length(),
dev_ctx->stream());
#else
PADDLE_THROW(paddle::platform::errors::Fatal(
"Not compile with CUDA, should not reach here."));
#endif
}
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework::LoD lod;
for (auto &level : pt.lod) {
lod.emplace_back(level);
}
t->set_lod(lod);
return true;
}
bool AnalysisPredictor::Init( bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope, const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) { const std::shared_ptr<framework::ProgramDesc> &program) {
...@@ -274,47 +325,10 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -274,47 +325,10 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
feed_tensors_.resize(inputs.size()); feed_tensors_.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) { for (size_t i = 0; i < inputs.size(); ++i) {
auto &input = feed_tensors_[i]; framework::LoDTensor *input = &feed_tensors_[i];
framework::DDim ddim = framework::make_ddim(inputs[i].shape); if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) {
void *input_ptr;
if (inputs[i].dtype == PaddleDType::INT64) {
input_ptr = input.mutable_data<int64_t>(ddim, place_);
} else if (inputs[i].dtype == PaddleDType::FLOAT32) {
input_ptr = input.mutable_data<float>(ddim, place_);
} else if (inputs[i].dtype == PaddleDType::INT32) {
input_ptr = input.mutable_data<int32_t>(ddim, place_);
} else {
LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
return false; return false;
} }
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
inputs[i].data.length());
} else {
#ifdef PADDLE_WITH_CUDA
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto *dev_ctx =
static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
auto dst_gpu_place = boost::get<platform::CUDAPlace>(place_);
memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
platform::CPUPlace(), inputs[i].data.data(),
inputs[i].data.length(), dev_ctx->stream());
#else
PADDLE_THROW("Not compile with CUDA, should not reach here.");
#endif
}
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework::LoD lod;
for (auto &level : inputs[i].lod) {
lod.emplace_back(level);
}
input.set_lod(lod);
int idx = -1; int idx = -1;
if (config_.specify_input_name_) { if (config_.specify_input_name_) {
auto name = inputs[i].name; auto name = inputs[i].name;
...@@ -326,7 +340,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, ...@@ -326,7 +340,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
} else { } else {
idx = boost::get<int>(feeds_[i]->GetAttr("col")); idx = boost::get<int>(feeds_[i]->GetAttr("col"));
} }
framework::SetFeedVariable(scope, input, "feed", idx); framework::SetFeedVariable(scope, *input, "feed", idx);
} }
return true; return true;
} }
......
...@@ -39,6 +39,24 @@ extern std::string paddle::framework::DataTypeToString( ...@@ -39,6 +39,24 @@ extern std::string paddle::framework::DataTypeToString(
namespace paddle { namespace paddle {
namespace inference { namespace inference {
template <typename T>
constexpr PaddleDType PaddleTensorGetDType();
template <>
constexpr PaddleDType PaddleTensorGetDType<int32_t>() {
return PaddleDType::INT32;
}
template <>
constexpr PaddleDType PaddleTensorGetDType<int64_t>() {
return PaddleDType::INT64;
}
template <>
constexpr PaddleDType PaddleTensorGetDType<float>() {
return PaddleDType::FLOAT32;
}
using paddle::framework::DataTypeToString; using paddle::framework::DataTypeToString;
// Timer for timer // Timer for timer
......
cc_library(benchmark SRCS benchmark.cc DEPS enforce) cc_library(benchmark SRCS benchmark.cc DEPS enforce)
cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
cc_library(infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor)
cc_test(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/utils/io_utils.h"
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace inference {
// =========================================================
// Item | Type | Bytes
// ---------------------------------------------------------
// Version | uint32_t | 4
// ---------------------------------------------------------
// Bytes of `Name` | uint64_t | 8
// Name | char | Bytes of `Name`
// ---------------------------------------------------------
// LoD Level | uint64_t | 8
// Bytes of `LoD[0]`| uint64_t | 8
// LoD[0] | uint64_t | Bytes of `LoD[0]`
// ... | ... | ...
// ---------------------------------------------------------
// Dims of `Shape` | uint64_t | 8
// Shape | uint64_t | Dims * 4
// ---------------------------------------------------------
// Dtype | int32_t | 4
// Bytes of `Data` | uint64_t | 8
// Data | Dtype | Bytes of `Data`
// =========================================================
void SerializePDTensorToStream(std::ostream *os, const PaddleTensor &tensor) {
// 1. Version
os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
sizeof(kCurPDTensorVersion));
// 2. Name
uint64_t name_bytes = tensor.name.size();
os->write(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
os->write(tensor.name.c_str(), name_bytes);
// 3. LoD
auto lod = tensor.lod;
uint64_t lod_size = lod.size();
os->write(reinterpret_cast<const char *>(&lod_size), sizeof(lod_size));
for (auto &each : lod) {
auto size = each.size() * sizeof(size_t);
os->write(reinterpret_cast<const char *>(&size), sizeof(size));
os->write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
// 4. Shape
size_t dims = tensor.shape.size();
os->write(reinterpret_cast<const char *>(&dims), sizeof(dims));
os->write(reinterpret_cast<const char *>(tensor.shape.data()),
sizeof(int) * dims);
// 5. Data
os->write(reinterpret_cast<const char *>(&tensor.dtype),
sizeof(tensor.dtype));
uint64_t length = tensor.data.length();
os->write(reinterpret_cast<const char *>(&length), sizeof(size_t));
os->write(reinterpret_cast<const char *>(tensor.data.data()), length);
}
void DeserializePDTensorToStream(std::istream &is, PaddleTensor *tensor) {
// 1. Version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// 2. Name
uint64_t name_bytes;
is.read(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
std::vector<char> bytes(name_bytes);
is.read(bytes.data(), name_bytes);
tensor->name = std::string(bytes.data(), name_bytes);
// 3. LoD
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto *lod = &(tensor->lod);
lod->resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
(*lod)[i] = tmp;
}
// 4. Shape
size_t dims;
is.read(reinterpret_cast<char *>(&dims), sizeof(dims));
tensor->shape.resize(dims);
is.read(reinterpret_cast<char *>(tensor->shape.data()), sizeof(int) * dims);
// 5. Data
uint64_t length;
is.read(reinterpret_cast<char *>(&tensor->dtype), sizeof(tensor->dtype));
is.read(reinterpret_cast<char *>(&length), sizeof(length));
tensor->data.Resize(length);
is.read(reinterpret_cast<char *>(tensor->data.data()), length);
}
// =========================================================
// Item | Type | Bytes
// ---------------------------------------------------------
// Version | uint32_t | 4
// ---------------------------------------------------------
// Size of Tensors | uint64_t | 8
// Tensors | ---- | ---
// ---------------------------------------------------------
void SerializePDTensorsToStream(std::ostream *os,
const std::vector<PaddleTensor> &tensors) {
// 1. Version
os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
sizeof(kCurPDTensorVersion));
// 2. Tensors
uint64_t num = tensors.size();
os->write(reinterpret_cast<char *>(&num), sizeof(num));
for (const auto &tensor : tensors) {
SerializePDTensorToStream(os, tensor);
}
}
void DeserializePDTensorsToStream(std::istream &is,
std::vector<PaddleTensor> *tensors) {
// 1. Version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// 2. Tensors
uint64_t num;
is.read(reinterpret_cast<char *>(&num), sizeof(num));
tensors->resize(num);
for (auto &tensor : *tensors) {
DeserializePDTensorToStream(is, &tensor);
}
}
void SerializePDTensorsToFile(const std::string &path,
const std::vector<PaddleTensor> &tensors) {
std::ofstream fout(path, std::ios::binary);
SerializePDTensorsToStream(&fout, tensors);
fout.close();
}
void DeserializePDTensorsToFile(const std::string &path,
std::vector<PaddleTensor> *tensors) {
bool is_present = analysis::FileExists(path);
PADDLE_ENFORCE_EQ(is_present, true, platform::errors::InvalidArgument(
"Cannot open %s to read", path));
std::ifstream fin(path, std::ios::binary);
DeserializePDTensorsToStream(fin, tensors);
fin.close();
}
} // namespace inference
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_api.h"
namespace paddle {
namespace inference {
constexpr uint32_t kCurPDTensorVersion = 0;
void SerializePDTensorToStream(std::ostream* os, const PaddleTensor& tensor);
void DeserializePDTensorToStream(std::istream& is, PaddleTensor* tensor);
void SerializePDTensorsToStream(std::ostream* os,
const std::vector<PaddleTensor>& tensors);
void DeserializePDTensorsToStream(std::istream& is,
std::vector<PaddleTensor>* tensors);
void SerializePDTensorsToFile(const std::string& path,
const std::vector<PaddleTensor>& tensors);
void DeserializePDTensorsToFile(const std::string& path,
std::vector<PaddleTensor>* tensors);
} // namespace inference
} // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/utils/io_utils.h"
namespace paddle {
namespace inference {
namespace {
bool pd_tensor_equal(const paddle::PaddleTensor& ref,
const paddle::PaddleTensor& t) {
bool is_equal = true;
VLOG(3) << "ref.name: " << ref.name << ", t.name: " << t.name;
VLOG(3) << "ref.dtype: " << ref.dtype << ", t.dtype: " << t.dtype;
VLOG(3) << "ref.lod_level: " << ref.lod.size()
<< ", t.dtype: " << t.lod.size();
VLOG(3) << "ref.data_len: " << ref.data.length()
<< ", t.data_len: " << t.data.length();
return is_equal && (ref.name == t.name) && (ref.lod == t.lod) &&
(ref.dtype == t.dtype) &&
(std::memcmp(ref.data.data(), t.data.data(), ref.data.length()) == 0);
}
template <typename T>
void test_io_utils() {
std::vector<T> input({6, 8});
paddle::PaddleTensor in;
in.name = "Hello";
in.shape = {1, 2};
in.lod = std::vector<std::vector<size_t>>{{0, 1}};
in.data = paddle::PaddleBuf(static_cast<void*>(input.data()),
input.size() * sizeof(T));
in.dtype = paddle::inference::PaddleTensorGetDType<T>();
std::stringstream ss;
paddle::inference::SerializePDTensorToStream(&ss, in);
paddle::PaddleTensor out;
paddle::inference::DeserializePDTensorToStream(ss, &out);
ASSERT_TRUE(pd_tensor_equal(in, out));
}
} // namespace
} // namespace inference
} // namespace paddle
TEST(infer_io_utils, float32) { paddle::inference::test_io_utils<float>(); }
TEST(infer_io_utils, int64) { paddle::inference::test_io_utils<int64_t>(); }
TEST(infer_io_utils, tensors) {
// Create a float32 tensor.
std::vector<float> input_fp32({1.1f, 3.2f, 5.0f, 8.2f});
paddle::PaddleTensor in_fp32;
in_fp32.name = "Tensor.fp32_0";
in_fp32.shape = {2, 2};
in_fp32.data = paddle::PaddleBuf(static_cast<void*>(input_fp32.data()),
input_fp32.size() * sizeof(float));
in_fp32.dtype = paddle::inference::PaddleTensorGetDType<float>();
// Create a int64 tensor.
std::vector<float> input_int64({5, 8});
paddle::PaddleTensor in_int64;
in_int64.name = "Tensor.int64_0";
in_int64.shape = {1, 2};
in_int64.lod = std::vector<std::vector<size_t>>{{0, 1}};
in_int64.data = paddle::PaddleBuf(static_cast<void*>(input_int64.data()),
input_int64.size() * sizeof(int64_t));
in_int64.dtype = paddle::inference::PaddleTensorGetDType<int64_t>();
// Serialize tensors.
std::vector<paddle::PaddleTensor> tensors_in({in_fp32, in_int64});
std::string file_path = "./io_utils_tensors";
paddle::inference::SerializePDTensorsToFile(file_path, tensors_in);
// Deserialize tensors.
std::vector<paddle::PaddleTensor> tensors_out;
paddle::inference::DeserializePDTensorsToFile(file_path, &tensors_out);
// Check results.
ASSERT_EQ(tensors_in.size(), tensors_out.size());
for (size_t i = 0; i < tensors_in.size(); ++i) {
ASSERT_TRUE(
paddle::inference::pd_tensor_equal(tensors_in[i], tensors_out[i]));
}
}
set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune
feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool
analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context
gloo_wrapper) gloo_wrapper infer_io_utils)
if (WITH_NCCL) if (WITH_NCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper)
......
...@@ -27,8 +27,10 @@ ...@@ -27,8 +27,10 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/utils/io_utils.h"
namespace py = pybind11; namespace py = pybind11;
...@@ -78,24 +80,6 @@ void PaddleBufReset(PaddleBuf &buf, py::array_t<T> data) { // NOLINT ...@@ -78,24 +80,6 @@ void PaddleBufReset(PaddleBuf &buf, py::array_t<T> data) { // NOLINT
static_cast<T *>(buf.data())); static_cast<T *>(buf.data()));
} }
template <typename T>
constexpr PaddleDType PaddleTensorGetDType();
template <>
constexpr PaddleDType PaddleTensorGetDType<int32_t>() {
return PaddleDType::INT32;
}
template <>
constexpr PaddleDType PaddleTensorGetDType<int64_t>() {
return PaddleDType::INT64;
}
template <>
constexpr PaddleDType PaddleTensorGetDType<float>() {
return PaddleDType::FLOAT32;
}
template <typename T> template <typename T>
PaddleTensor PaddleTensorCreate( PaddleTensor PaddleTensorCreate(
py::array_t<T> data, const std::string name = "", py::array_t<T> data, const std::string name = "",
...@@ -111,7 +95,7 @@ PaddleTensor PaddleTensorCreate( ...@@ -111,7 +95,7 @@ PaddleTensor PaddleTensorCreate(
tensor.data = PaddleBuf(data.mutable_data(), data.size() * sizeof(T)); tensor.data = PaddleBuf(data.mutable_data(), data.size() * sizeof(T));
} }
tensor.dtype = PaddleTensorGetDType<T>(); tensor.dtype = inference::PaddleTensorGetDType<T>();
tensor.name = name; tensor.name = name;
tensor.lod = lod; tensor.lod = lod;
tensor.shape.resize(data.ndim()); tensor.shape.resize(data.ndim());
...@@ -192,6 +176,12 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT ...@@ -192,6 +176,12 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
} }
return array; return array;
} }
py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
std::stringstream ss;
paddle::inference::SerializePDTensorToStream(&ss, tensor);
return static_cast<py::bytes>(ss.str());
}
} // namespace } // namespace
void BindInferenceApi(py::module *m) { void BindInferenceApi(py::module *m) {
...@@ -214,6 +204,7 @@ void BindInferenceApi(py::module *m) { ...@@ -214,6 +204,7 @@ void BindInferenceApi(py::module *m) {
m->def("create_paddle_predictor", m->def("create_paddle_predictor",
&paddle::CreatePaddlePredictor<NativeConfig>); &paddle::CreatePaddlePredictor<NativeConfig>);
m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
} }
namespace { namespace {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册