From b93b1e34aa1b45ecc64bad259412078e8904d44d Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 2 Sep 2022 04:55:44 -0500 Subject: [PATCH] [Phi] Migrate serialization utils (#45667) * add serialization funcs in phi * migrate serialization utils --- paddle/fluid/framework/CMakeLists.txt | 10 +- paddle/fluid/framework/lod_tensor.cc | 89 +------- paddle/fluid/framework/selected_rows_utils.cc | 62 +----- paddle/fluid/framework/selected_rows_utils.h | 2 +- paddle/fluid/framework/tensor_util.h | 12 -- paddle/phi/core/CMakeLists.txt | 4 + paddle/phi/core/serialization.cc | 190 ++++++++++++++++++ paddle/phi/core/serialization.h | 52 +++++ paddle/phi/core/tensor_meta.h | 17 +- 9 files changed, 285 insertions(+), 153 deletions(-) create mode 100644 paddle/phi/core/serialization.cc create mode 100644 paddle/phi/core/serialization.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index c9b6213cf6..a230346a8e 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -139,7 +139,13 @@ endif() cc_library( lod_tensor SRCS lod_tensor.cc - DEPS ddim mixed_vector place tensor framework_proto version) + DEPS ddim + mixed_vector + place + tensor + framework_proto + version + serialization) cc_test( lod_tensor_test @@ -1078,7 +1084,7 @@ cc_test( cc_library( selected_rows_utils SRCS selected_rows_utils.cc - DEPS selected_rows) + DEPS selected_rows serialization) cc_test( selected_rows_utils_test SRCS selected_rows_utils_test.cc diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index aae3ad3bd4..bd78f9b0a0 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/version.h" +#include "paddle/phi/core/serialization.h" namespace paddle { namespace framework { @@ -205,29 +206,7 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, void SerializeToStream(std::ostream &os, const LoDTensor &tensor, const platform::DeviceContext &dev_ctx) { - { // the 1st field, uint32_t version for LoDTensor - os.write(reinterpret_cast(&kCurTensorVersion), - sizeof(kCurTensorVersion)); - } - { - // the 2st field, LoD information - // uint64_t lod_level - // uint64_t lod_level_1 size in byte. - // int* lod_level_1 data - // ... - auto lod = tensor.lod(); - uint64_t size = lod.size(); - os.write(reinterpret_cast(&size), sizeof(size)); - - for (auto &each : lod) { - size = each.size() * sizeof(framework::LoD::value_type::value_type); - os.write(reinterpret_cast(&size), sizeof(size)); - os.write(reinterpret_cast(each.data()), - static_cast(size)); - } - } - // the 3st field, Tensor - TensorToStream(os, static_cast(tensor), dev_ctx); + phi::SerializeToStream(os, tensor, dev_ctx); } void SerializeToStream(std::ostream &os, const LoDTensor &tensor) { @@ -235,14 +214,14 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor) { const platform::DeviceContext *dev_ctx; auto place = tensor.place(); dev_ctx = pool.Get(place); - SerializeToStream(os, tensor, *dev_ctx); + phi::SerializeToStream(os, tensor, *dev_ctx); } void DeserializeFromStream(std::istream &os, LoDTensor *tensor) { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext *dev_ctx; dev_ctx = pool.Get(platform::CPUPlace()); - DeserializeFromStream(os, tensor, *dev_ctx); + phi::DeserializeFromStream(os, tensor, *dev_ctx); } void DeserializeFromStream(std::istream &is, @@ -250,69 +229,13 @@ void DeserializeFromStream(std::istream &is, const platform::DeviceContext &dev_ctx, const size_t &seek, const std::vector &shape) { - { - // the 1st field, unit32_t version for LoDTensor - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(framework::IsTensorVersionSupported(version), - true, - platform::errors::InvalidArgument( - "Tensor version %u is not supported.", version)); - PADDLE_ENFORCE_EQ( - version, - 0U, - platform::errors::InvalidArgument( - "Deserialize to tensor failed, maybe the loaded file is " - "not a paddle model(expected file format: 0, but %u found).", - version)); - } - { - // the 2st field, LoD information - uint64_t lod_level; - is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - } - // the 3st filed, Tensor - TensorFromStream(is, static_cast(tensor), dev_ctx, seek, shape); + phi::DeserializeFromStream(is, tensor, dev_ctx, seek, shape); } void DeserializeFromStream(std::istream &is, LoDTensor *tensor, const platform::DeviceContext &dev_ctx) { - { - // the 1st field, unit32_t version for LoDTensor - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(framework::IsTensorVersionSupported(version), - true, - platform::errors::InvalidArgument( - "Tensor version %u is not supported.", version)); - PADDLE_ENFORCE_EQ( - version, - 0U, - platform::errors::InvalidArgument( - "Deserialize to tensor failed, maybe the loaded file is " - "not a paddle model(expected file format: 0, but %u found).", - version)); - } - { - // the 2st field, LoD information - uint64_t lod_level; - is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::vector tmp(size / sizeof(size_t)); - is.read(reinterpret_cast(tmp.data()), - static_cast(size)); - lod[i] = tmp; - } - } - // the 3st filed, Tensor - TensorFromStream(is, static_cast(tensor), dev_ctx); + phi::DeserializeFromStream(is, tensor, dev_ctx); } LoD ConvertToOffsetBasedLoD(const LoD &length_lod) { diff --git a/paddle/fluid/framework/selected_rows_utils.cc b/paddle/fluid/framework/selected_rows_utils.cc index 6d961b92f5..be0a4a2fa3 100644 --- a/paddle/fluid/framework/selected_rows_utils.cc +++ b/paddle/fluid/framework/selected_rows_utils.cc @@ -14,32 +14,15 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows_utils.h" +#include "paddle/phi/core/serialization.h" + namespace paddle { namespace framework { void SerializeToStream(std::ostream& os, const phi::SelectedRows& selected_rows, const platform::DeviceContext& dev_ctx) { - { // the 1st field, uint32_t version - constexpr uint32_t version = 0; - os.write(reinterpret_cast(&version), sizeof(version)); - } - { - // the 2st field, rows information - auto& rows = selected_rows.rows(); - uint64_t size = rows.size(); - os.write(reinterpret_cast(&size), sizeof(size)); - for (uint64_t i = 0; i < size; ++i) { - os.write(reinterpret_cast(&rows[i]), sizeof(rows[i])); - } - } - { - // the 3st field, the height of SelectedRows - int64_t height = selected_rows.height(); - os.write(reinterpret_cast(&height), sizeof(height)); - } - // the 4st field, Tensor data - TensorToStream(os, selected_rows.value(), dev_ctx); + phi::SerializeToStream(os, selected_rows, dev_ctx); } void SerializeToStream(std::ostream& os, @@ -48,50 +31,21 @@ void SerializeToStream(std::ostream& os, const platform::DeviceContext* dev_ctx; auto place = selected_rows.place(); dev_ctx = pool.Get(place); - SerializeToStream(os, selected_rows, *dev_ctx); + phi::SerializeToStream(os, selected_rows, *dev_ctx); } -void DeserializeFromStream(std::istream& os, phi::SelectedRows* selected_rows) { +void DeserializeFromStream(std::istream& is, phi::SelectedRows* selected_rows) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext* dev_ctx; dev_ctx = pool.Get(platform::CPUPlace()); - DeserializeFromStream(os, selected_rows, *dev_ctx); + phi::DeserializeFromStream(is, selected_rows, *dev_ctx); } void DeserializeFromStream(std::istream& is, phi::SelectedRows* selected_rows, const platform::DeviceContext& dev_ctx) { - { - // the 1st field, unit32_t version for SelectedRows - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, - 0U, - platform::errors::InvalidArgument( - "Only version 0 SelectedRows is supported.")); - } - { - // the 2st field, rows information - uint64_t size = 0; - is.read(reinterpret_cast(&size), sizeof(size)); - PADDLE_ENFORCE_EQ( - is.good(), - true, - platform::errors::Unavailable("Cannot read the number of rows.")); - auto& rows = *selected_rows->mutable_rows(); - rows.resize(size); - for (uint64_t i = 0; i < size; ++i) { - is.read(reinterpret_cast(&rows[i]), sizeof(int64_t)); - } - } - { - // the 3st field, the height of the SelectedRows - int64_t height; - is.read(reinterpret_cast(&height), sizeof(int64_t)); - selected_rows->set_height(height); - } - // the 4st field, tensor which contains the data - TensorFromStream(is, selected_rows->mutable_value(), dev_ctx); + phi::DeserializeFromStream(is, selected_rows, dev_ctx); } + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/selected_rows_utils.h b/paddle/fluid/framework/selected_rows_utils.h index cc76c8959e..c849af863b 100644 --- a/paddle/fluid/framework/selected_rows_utils.h +++ b/paddle/fluid/framework/selected_rows_utils.h @@ -42,7 +42,7 @@ void DeserializeFromStream(std::istream& is, void SerializeToStream(std::ostream& os, const phi::SelectedRows& selected_rows); -void DeserializeFromStream(std::istream& os, phi::SelectedRows* selected_rows); +void DeserializeFromStream(std::istream& is, phi::SelectedRows* selected_rows); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index c617441fd6..f70f3e3fb4 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -112,18 +112,6 @@ void TensorToVector(const Tensor& src, template void TesnorToVector(const Tensor& src, std::vector* dst); -void TensorToStream(std::ostream& os, - const Tensor& tensor, - const platform::DeviceContext& dev_ctx); -void TensorFromStream(std::istream& is, - Tensor* tensor, - const platform::DeviceContext& dev_ctx); -void TensorFromStream(std::istream& is, - Tensor* tensor, - const platform::DeviceContext& dev_ctx, - const size_t& seek, - const std::vector& shape); - // convert dlpack's DLTensor to tensor void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst); diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index e48f73694a..c353e21fbd 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -73,6 +73,10 @@ cc_library( phi_device_context SRCS device_context.cc DEPS dense_tensor selected_rows) +cc_library( + serialization + SRCS serialization.cc + DEPS version tensor phi_device_context) cc_library( custom_kernel diff --git a/paddle/phi/core/serialization.cc b/paddle/phi/core/serialization.cc new file mode 100644 index 0000000000..2fed5115f8 --- /dev/null +++ b/paddle/phi/core/serialization.cc @@ -0,0 +1,190 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/serialization.h" + +#include "paddle/phi/core/enforce.h" + +// Note: The TensorToStream depends on framework.proto, +// it is difficult to move into phi +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/framework/version.h" + +namespace phi { + +void SerializeToStream(std::ostream &os, + const DenseTensor &tensor, + const DeviceContext &dev_ctx) { + { // the 1st field, uint32_t version for DenseTensor + os.write( + reinterpret_cast(&paddle::framework::kCurTensorVersion), + sizeof(paddle::framework::kCurTensorVersion)); + } + { + // the 2st field, LoD information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... + auto lod = tensor.lod(); + uint64_t size = lod.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + + for (auto &each : lod) { + size = each.size() * sizeof(phi::LoD::value_type::value_type); + os.write(reinterpret_cast(&size), sizeof(size)); + os.write(reinterpret_cast(each.data()), + static_cast(size)); + } + } + // the 3st field, Tensor + paddle::framework::TensorToStream( + os, static_cast(tensor), dev_ctx); +} + +void DeserializeFromStream(std::istream &is, + DenseTensor *tensor, + const DeviceContext &dev_ctx, + const size_t &seek, + const std::vector &shape) { + { + // the 1st field, unit32_t version for DenseTensor + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(paddle::framework::IsTensorVersionSupported(version), + true, + phi::errors::InvalidArgument( + "Tensor version %u is not supported.", version)); + PADDLE_ENFORCE_EQ( + version, + 0U, + phi::errors::InvalidArgument( + "Deserialize to tensor failed, maybe the loaded file is " + "not a paddle model(expected file format: 0, but %u found).", + version)); + } + { + // the 2st field, LoD information + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + } + // the 3st filed, Tensor + paddle::framework::TensorFromStream( + is, static_cast(tensor), dev_ctx, seek, shape); +} + +void DeserializeFromStream(std::istream &is, + DenseTensor *tensor, + const DeviceContext &dev_ctx) { + { + // the 1st field, unit32_t version for DenseTensor + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(paddle::framework::IsTensorVersionSupported(version), + true, + phi::errors::InvalidArgument( + "Tensor version %u is not supported.", version)); + PADDLE_ENFORCE_EQ( + version, + 0U, + phi::errors::InvalidArgument( + "Deserialize to tensor failed, maybe the loaded file is " + "not a paddle model(expected file format: 0, but %u found).", + version)); + } + { + // the 2st field, LoD information + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + lod[i] = tmp; + } + } + // the 3st filed, Tensor + paddle::framework::TensorFromStream( + is, static_cast(tensor), dev_ctx); +} + +void SerializeToStream(std::ostream &os, + const SelectedRows &selected_rows, + const DeviceContext &dev_ctx) { + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { + // the 2st field, rows information + auto &rows = selected_rows.rows(); + uint64_t size = rows.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + for (uint64_t i = 0; i < size; ++i) { + os.write(reinterpret_cast(&rows[i]), sizeof(rows[i])); + } + } + { + // the 3st field, the height of SelectedRows + int64_t height = selected_rows.height(); + os.write(reinterpret_cast(&height), sizeof(height)); + } + // the 4st field, Tensor data + paddle::framework::TensorToStream(os, selected_rows.value(), dev_ctx); +} + +void DeserializeFromStream(std::istream &is, + SelectedRows *selected_rows, + const DeviceContext &dev_ctx) { + { + // the 1st field, unit32_t version for SelectedRows + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, + 0U, + phi::errors::InvalidArgument( + "Only version 0 SelectedRows is supported.")); + } + { + // the 2st field, rows information + uint64_t size = 0; + is.read(reinterpret_cast(&size), sizeof(size)); + PADDLE_ENFORCE_EQ( + is.good(), + true, + phi::errors::Unavailable("Cannot read the number of rows.")); + auto &rows = *selected_rows->mutable_rows(); + rows.resize(size); + for (uint64_t i = 0; i < size; ++i) { + is.read(reinterpret_cast(&rows[i]), sizeof(int64_t)); + } + } + { + // the 3st field, the height of the SelectedRows + int64_t height; + is.read(reinterpret_cast(&height), sizeof(int64_t)); + selected_rows->set_height(height); + } + // the 4st field, tensor which contains the data + paddle::framework::TensorFromStream( + is, selected_rows->mutable_value(), dev_ctx); +} + +} // namespace phi diff --git a/paddle/phi/core/serialization.h b/paddle/phi/core/serialization.h new file mode 100644 index 0000000000..4470b5c128 --- /dev/null +++ b/paddle/phi/core/serialization.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" +#include "paddle/phi/core/selected_rows.h" + +namespace phi { + +/* + * Serialize/Desiralize DenseTensor to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, + const DenseTensor& tensor, + const DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, + DenseTensor* tensor, + const DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, + DenseTensor* tensor, + const DeviceContext& dev_ctx, + const size_t& seek, + const std::vector& shape); + +/* + * Serialize/Desiralize SelectedRows to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, + const SelectedRows& selected_rows, + const DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, + SelectedRows* selected_rows, + const DeviceContext& dev_ctx); + +} // namespace phi diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index 4ad5abc772..18f276f8b6 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -25,8 +25,23 @@ limitations under the License. */ namespace phi { -using DDim = phi::DDim; +/* + * LoD is short for Level of Details. + * + * - in a level, each element indicates relative offset of the lower level + * - the first element should be 0 and that indicates that this sequence start + * from 0 + * - each sequence's begin and end(no-inclusive) is level[id, id+1] + * + * For example: + * 3-level LoD stores + * + * 0 2 3 + * 0 2 4 7 + * 0 2 5 7 10 12 15 20 + */ using LoD = std::vector>; + /// \brief The meta data of dense tensor. Take the structure type /// and use all default operations. /// -- GitLab