diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index bca662b870b01f62d8619b63d4b6f908c37a70fd..88723e24184cce240d4bccd1b4c15a1ea8f14ed6 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -76,6 +76,7 @@ set(SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc + ${CMAKE_CURRENT_SOURCE_DIR}/utils/io_utils.cc ${mkldnn_quantizer_src_file}) # Create shared inference library defaultly diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 107e5ae7d818d24d664e4a02530b718f9460364e..5aa3d7a0527bc2736c47abf9c2bd47d52b26ce9d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -71,6 +71,57 @@ bool IsPersistable(const framework::VarDesc *var) { } } // namespace +bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t, + const platform::Place &place) { + framework::DDim ddim = framework::make_ddim(pt.shape); + void *input_ptr; + if (pt.dtype == PaddleDType::INT64) { + input_ptr = t->mutable_data(ddim, place); + } else if (pt.dtype == PaddleDType::FLOAT32) { + input_ptr = t->mutable_data(ddim, place); + } else if (pt.dtype == PaddleDType::INT32) { + input_ptr = t->mutable_data(ddim, place); + } else { + LOG(ERROR) << "unsupported feed type " << pt.dtype; + return false; + } + + PADDLE_ENFORCE_NOT_NULL( + input_ptr, + paddle::platform::errors::Fatal( + "Cannot convert to LoDTensor because LoDTensor creation failed.")); + PADDLE_ENFORCE_NOT_NULL( + pt.data.data(), + paddle::platform::errors::InvalidArgument( + "The data contained in the input PaddleTensor is illegal.")); + + if (platform::is_cpu_place(place)) { + // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. + std::memcpy(static_cast(input_ptr), pt.data.data(), + pt.data.length()); + } else { +#ifdef PADDLE_WITH_CUDA + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(place)); + auto dst_gpu_place = boost::get(place); + memory::Copy(dst_gpu_place, static_cast(input_ptr), + platform::CPUPlace(), pt.data.data(), pt.data.length(), + dev_ctx->stream()); +#else + PADDLE_THROW(paddle::platform::errors::Fatal( + "Not compile with CUDA, should not reach here.")); +#endif + } + // TODO(Superjomn) Low performance, need optimization for heavy LoD copy. + framework::LoD lod; + for (auto &level : pt.lod) { + lod.emplace_back(level); + } + t->set_lod(lod); + return true; +} + bool AnalysisPredictor::Init( const std::shared_ptr &parent_scope, const std::shared_ptr &program) { @@ -274,47 +325,10 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, feed_tensors_.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { - auto &input = feed_tensors_[i]; - framework::DDim ddim = framework::make_ddim(inputs[i].shape); - void *input_ptr; - if (inputs[i].dtype == PaddleDType::INT64) { - input_ptr = input.mutable_data(ddim, place_); - } else if (inputs[i].dtype == PaddleDType::FLOAT32) { - input_ptr = input.mutable_data(ddim, place_); - } else if (inputs[i].dtype == PaddleDType::INT32) { - input_ptr = input.mutable_data(ddim, place_); - } else { - LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; + framework::LoDTensor *input = &feed_tensors_[i]; + if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) { return false; } - - PADDLE_ENFORCE_NOT_NULL(input_ptr); - PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); - - if (platform::is_cpu_place(place_)) { - // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. - std::memcpy(static_cast(input_ptr), inputs[i].data.data(), - inputs[i].data.length()); - } else { -#ifdef PADDLE_WITH_CUDA - platform::DeviceContextPool &pool = - platform::DeviceContextPool::Instance(); - auto *dev_ctx = - static_cast(pool.Get(place_)); - auto dst_gpu_place = boost::get(place_); - memory::Copy(dst_gpu_place, static_cast(input_ptr), - platform::CPUPlace(), inputs[i].data.data(), - inputs[i].data.length(), dev_ctx->stream()); -#else - PADDLE_THROW("Not compile with CUDA, should not reach here."); -#endif - } - // TODO(Superjomn) Low performance, need optimization for heavy LoD copy. - framework::LoD lod; - for (auto &level : inputs[i].lod) { - lod.emplace_back(level); - } - input.set_lod(lod); int idx = -1; if (config_.specify_input_name_) { auto name = inputs[i].name; @@ -326,7 +340,7 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, } else { idx = boost::get(feeds_[i]->GetAttr("col")); } - framework::SetFeedVariable(scope, input, "feed", idx); + framework::SetFeedVariable(scope, *input, "feed", idx); } return true; } diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 907d35b298c5bff872afe5cbfe12201b087c6d97..b58c300c2ed03c02a6936502d356745b799eb4cc 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -39,6 +39,24 @@ extern std::string paddle::framework::DataTypeToString( namespace paddle { namespace inference { +template +constexpr PaddleDType PaddleTensorGetDType(); + +template <> +constexpr PaddleDType PaddleTensorGetDType() { + return PaddleDType::INT32; +} + +template <> +constexpr PaddleDType PaddleTensorGetDType() { + return PaddleDType::INT64; +} + +template <> +constexpr PaddleDType PaddleTensorGetDType() { + return PaddleDType::FLOAT32; +} + using paddle::framework::DataTypeToString; // Timer for timer diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt index 2104e4ac7222258ee025bd5acd60b1db251df654..956cd739371ce70873c69c37cc9f80bdb42fa6af 100644 --- a/paddle/fluid/inference/utils/CMakeLists.txt +++ b/paddle/fluid/inference/utils/CMakeLists.txt @@ -1,2 +1,4 @@ cc_library(benchmark SRCS benchmark.cc DEPS enforce) cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) +cc_library(infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor) +cc_test(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils) diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..346fa481325baca2ec526aa9cd64c1279458c5d5 --- /dev/null +++ b/paddle/fluid/inference/utils/io_utils.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/utils/io_utils.h" +#include +#include +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace inference { + +// ========================================================= +// Item | Type | Bytes +// --------------------------------------------------------- +// Version | uint32_t | 4 +// --------------------------------------------------------- +// Bytes of `Name` | uint64_t | 8 +// Name | char | Bytes of `Name` +// --------------------------------------------------------- +// LoD Level | uint64_t | 8 +// Bytes of `LoD[0]`| uint64_t | 8 +// LoD[0] | uint64_t | Bytes of `LoD[0]` +// ... | ... | ... +// --------------------------------------------------------- +// Dims of `Shape` | uint64_t | 8 +// Shape | uint64_t | Dims * 4 +// --------------------------------------------------------- +// Dtype | int32_t | 4 +// Bytes of `Data` | uint64_t | 8 +// Data | Dtype | Bytes of `Data` +// ========================================================= +void SerializePDTensorToStream(std::ostream *os, const PaddleTensor &tensor) { + // 1. Version + os->write(reinterpret_cast(&kCurPDTensorVersion), + sizeof(kCurPDTensorVersion)); + // 2. Name + uint64_t name_bytes = tensor.name.size(); + os->write(reinterpret_cast(&name_bytes), sizeof(name_bytes)); + os->write(tensor.name.c_str(), name_bytes); + // 3. LoD + auto lod = tensor.lod; + uint64_t lod_size = lod.size(); + os->write(reinterpret_cast(&lod_size), sizeof(lod_size)); + for (auto &each : lod) { + auto size = each.size() * sizeof(size_t); + os->write(reinterpret_cast(&size), sizeof(size)); + os->write(reinterpret_cast(each.data()), + static_cast(size)); + } + // 4. Shape + size_t dims = tensor.shape.size(); + os->write(reinterpret_cast(&dims), sizeof(dims)); + os->write(reinterpret_cast(tensor.shape.data()), + sizeof(int) * dims); + // 5. Data + os->write(reinterpret_cast(&tensor.dtype), + sizeof(tensor.dtype)); + uint64_t length = tensor.data.length(); + os->write(reinterpret_cast(&length), sizeof(size_t)); + os->write(reinterpret_cast(tensor.data.data()), length); +} + +void DeserializePDTensorToStream(std::istream &is, PaddleTensor *tensor) { + // 1. Version + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + // 2. Name + uint64_t name_bytes; + is.read(reinterpret_cast(&name_bytes), sizeof(name_bytes)); + std::vector bytes(name_bytes); + is.read(bytes.data(), name_bytes); + tensor->name = std::string(bytes.data(), name_bytes); + // 3. LoD + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto *lod = &(tensor->lod); + lod->resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + (*lod)[i] = tmp; + } + // 4. Shape + size_t dims; + is.read(reinterpret_cast(&dims), sizeof(dims)); + tensor->shape.resize(dims); + is.read(reinterpret_cast(tensor->shape.data()), sizeof(int) * dims); + // 5. Data + uint64_t length; + is.read(reinterpret_cast(&tensor->dtype), sizeof(tensor->dtype)); + is.read(reinterpret_cast(&length), sizeof(length)); + tensor->data.Resize(length); + is.read(reinterpret_cast(tensor->data.data()), length); +} + +// ========================================================= +// Item | Type | Bytes +// --------------------------------------------------------- +// Version | uint32_t | 4 +// --------------------------------------------------------- +// Size of Tensors | uint64_t | 8 +// Tensors | ---- | --- +// --------------------------------------------------------- +void SerializePDTensorsToStream(std::ostream *os, + const std::vector &tensors) { + // 1. Version + os->write(reinterpret_cast(&kCurPDTensorVersion), + sizeof(kCurPDTensorVersion)); + // 2. Tensors + uint64_t num = tensors.size(); + os->write(reinterpret_cast(&num), sizeof(num)); + for (const auto &tensor : tensors) { + SerializePDTensorToStream(os, tensor); + } +} + +void DeserializePDTensorsToStream(std::istream &is, + std::vector *tensors) { + // 1. Version + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + // 2. Tensors + uint64_t num; + is.read(reinterpret_cast(&num), sizeof(num)); + tensors->resize(num); + for (auto &tensor : *tensors) { + DeserializePDTensorToStream(is, &tensor); + } +} + +void SerializePDTensorsToFile(const std::string &path, + const std::vector &tensors) { + std::ofstream fout(path, std::ios::binary); + SerializePDTensorsToStream(&fout, tensors); + fout.close(); +} + +void DeserializePDTensorsToFile(const std::string &path, + std::vector *tensors) { + bool is_present = analysis::FileExists(path); + PADDLE_ENFORCE_EQ(is_present, true, platform::errors::InvalidArgument( + "Cannot open %s to read", path)); + std::ifstream fin(path, std::ios::binary); + DeserializePDTensorsToStream(fin, tensors); + fin.close(); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/utils/io_utils.h b/paddle/fluid/inference/utils/io_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..853aba168b524eb289d4f57645bcaee809acdd0b --- /dev/null +++ b/paddle/fluid/inference/utils/io_utils.h @@ -0,0 +1,40 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/api/paddle_api.h" + +namespace paddle { +namespace inference { + +constexpr uint32_t kCurPDTensorVersion = 0; + +void SerializePDTensorToStream(std::ostream* os, const PaddleTensor& tensor); +void DeserializePDTensorToStream(std::istream& is, PaddleTensor* tensor); + +void SerializePDTensorsToStream(std::ostream* os, + const std::vector& tensors); +void DeserializePDTensorsToStream(std::istream& is, + std::vector* tensors); + +void SerializePDTensorsToFile(const std::string& path, + const std::vector& tensors); +void DeserializePDTensorsToFile(const std::string& path, + std::vector* tensors); +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..c8aa03c619e86410cb0e8c3b311d4a08d9ea8769 --- /dev/null +++ b/paddle/fluid/inference/utils/io_utils_tester.cc @@ -0,0 +1,97 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/utils/io_utils.h" + +namespace paddle { +namespace inference { +namespace { + +bool pd_tensor_equal(const paddle::PaddleTensor& ref, + const paddle::PaddleTensor& t) { + bool is_equal = true; + VLOG(3) << "ref.name: " << ref.name << ", t.name: " << t.name; + VLOG(3) << "ref.dtype: " << ref.dtype << ", t.dtype: " << t.dtype; + VLOG(3) << "ref.lod_level: " << ref.lod.size() + << ", t.dtype: " << t.lod.size(); + VLOG(3) << "ref.data_len: " << ref.data.length() + << ", t.data_len: " << t.data.length(); + return is_equal && (ref.name == t.name) && (ref.lod == t.lod) && + (ref.dtype == t.dtype) && + (std::memcmp(ref.data.data(), t.data.data(), ref.data.length()) == 0); +} + +template +void test_io_utils() { + std::vector input({6, 8}); + paddle::PaddleTensor in; + in.name = "Hello"; + in.shape = {1, 2}; + in.lod = std::vector>{{0, 1}}; + in.data = paddle::PaddleBuf(static_cast(input.data()), + input.size() * sizeof(T)); + in.dtype = paddle::inference::PaddleTensorGetDType(); + std::stringstream ss; + paddle::inference::SerializePDTensorToStream(&ss, in); + paddle::PaddleTensor out; + paddle::inference::DeserializePDTensorToStream(ss, &out); + ASSERT_TRUE(pd_tensor_equal(in, out)); +} +} // namespace +} // namespace inference +} // namespace paddle + +TEST(infer_io_utils, float32) { paddle::inference::test_io_utils(); } +TEST(infer_io_utils, int64) { paddle::inference::test_io_utils(); } + +TEST(infer_io_utils, tensors) { + // Create a float32 tensor. + std::vector input_fp32({1.1f, 3.2f, 5.0f, 8.2f}); + paddle::PaddleTensor in_fp32; + in_fp32.name = "Tensor.fp32_0"; + in_fp32.shape = {2, 2}; + in_fp32.data = paddle::PaddleBuf(static_cast(input_fp32.data()), + input_fp32.size() * sizeof(float)); + in_fp32.dtype = paddle::inference::PaddleTensorGetDType(); + + // Create a int64 tensor. + std::vector input_int64({5, 8}); + paddle::PaddleTensor in_int64; + in_int64.name = "Tensor.int64_0"; + in_int64.shape = {1, 2}; + in_int64.lod = std::vector>{{0, 1}}; + in_int64.data = paddle::PaddleBuf(static_cast(input_int64.data()), + input_int64.size() * sizeof(int64_t)); + in_int64.dtype = paddle::inference::PaddleTensorGetDType(); + + // Serialize tensors. + std::vector tensors_in({in_fp32, in_int64}); + std::string file_path = "./io_utils_tensors"; + paddle::inference::SerializePDTensorsToFile(file_path, tensors_in); + + // Deserialize tensors. + std::vector tensors_out; + paddle::inference::DeserializePDTensorsToFile(file_path, &tensors_out); + + // Check results. + ASSERT_EQ(tensors_in.size(), tensors_out.size()); + for (size_t i = 0; i < tensors_in.size(); ++i) { + ASSERT_TRUE( + paddle::inference::pd_tensor_equal(tensors_in[i], tensors_out[i])); + } +} diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 87dceb1850f944c8fce99111bb4e34f1280a0959..0fad32d160fd388d6bf150e61634bddbe7d967aa 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper prune feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context - gloo_wrapper) + gloo_wrapper infer_io_utils) if (WITH_NCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 46babdcc6efa81fa445307276b32bd757ffbd7ed..2d5aae960acd10005031fb10ede49927e1f2268a 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -27,8 +27,10 @@ #include #include #include "paddle/fluid/inference/api/analysis_predictor.h" +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h" +#include "paddle/fluid/inference/utils/io_utils.h" namespace py = pybind11; @@ -78,24 +80,6 @@ void PaddleBufReset(PaddleBuf &buf, py::array_t data) { // NOLINT static_cast(buf.data())); } -template -constexpr PaddleDType PaddleTensorGetDType(); - -template <> -constexpr PaddleDType PaddleTensorGetDType() { - return PaddleDType::INT32; -} - -template <> -constexpr PaddleDType PaddleTensorGetDType() { - return PaddleDType::INT64; -} - -template <> -constexpr PaddleDType PaddleTensorGetDType() { - return PaddleDType::FLOAT32; -} - template PaddleTensor PaddleTensorCreate( py::array_t data, const std::string name = "", @@ -111,7 +95,7 @@ PaddleTensor PaddleTensorCreate( tensor.data = PaddleBuf(data.mutable_data(), data.size() * sizeof(T)); } - tensor.dtype = PaddleTensorGetDType(); + tensor.dtype = inference::PaddleTensorGetDType(); tensor.name = name; tensor.lod = lod; tensor.shape.resize(data.ndim()); @@ -192,6 +176,12 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT } return array; } + +py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT + std::stringstream ss; + paddle::inference::SerializePDTensorToStream(&ss, tensor); + return static_cast(ss.str()); +} } // namespace void BindInferenceApi(py::module *m) { @@ -214,6 +204,7 @@ void BindInferenceApi(py::module *m) { m->def("create_paddle_predictor", &paddle::CreatePaddlePredictor); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); + m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); } namespace {