未验证 提交 b93b1e34 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Migrate serialization utils (#45667)

* add serialization funcs in phi

* migrate serialization utils
上级 504133c9
......@@ -139,7 +139,13 @@ endif()
cc_library(
lod_tensor
SRCS lod_tensor.cc
DEPS ddim mixed_vector place tensor framework_proto version)
DEPS ddim
mixed_vector
place
tensor
framework_proto
version
serialization)
cc_test(
lod_tensor_test
......@@ -1078,7 +1084,7 @@ cc_test(
cc_library(
selected_rows_utils
SRCS selected_rows_utils.cc
DEPS selected_rows)
DEPS selected_rows serialization)
cc_test(
selected_rows_utils_test
SRCS selected_rows_utils_test.cc
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/phi/core/serialization.h"
namespace paddle {
namespace framework {
......@@ -205,29 +206,7 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod,
void SerializeToStream(std::ostream &os,
const LoDTensor &tensor,
const platform::DeviceContext &dev_ctx) {
{ // the 1st field, uint32_t version for LoDTensor
os.write(reinterpret_cast<const char *>(&kCurTensorVersion),
sizeof(kCurTensorVersion));
}
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() * sizeof(framework::LoD::value_type::value_type);
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
os.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
// the 3st field, Tensor
TensorToStream(os, static_cast<Tensor>(tensor), dev_ctx);
phi::SerializeToStream(os, tensor, dev_ctx);
}
void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
......@@ -235,14 +214,14 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
const platform::DeviceContext *dev_ctx;
auto place = tensor.place();
dev_ctx = pool.Get(place);
SerializeToStream(os, tensor, *dev_ctx);
phi::SerializeToStream(os, tensor, *dev_ctx);
}
void DeserializeFromStream(std::istream &os, LoDTensor *tensor) {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext *dev_ctx;
dev_ctx = pool.Get(platform::CPUPlace());
DeserializeFromStream(os, tensor, *dev_ctx);
phi::DeserializeFromStream(os, tensor, *dev_ctx);
}
void DeserializeFromStream(std::istream &is,
......@@ -250,69 +229,13 @@ void DeserializeFromStream(std::istream &is,
const platform::DeviceContext &dev_ctx,
const size_t &seek,
const std::vector<int64_t> &shape) {
{
// the 1st field, unit32_t version for LoDTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(framework::IsTensorVersionSupported(version),
true,
platform::errors::InvalidArgument(
"Tensor version %u is not supported.", version));
PADDLE_ENFORCE_EQ(
version,
0U,
platform::errors::InvalidArgument(
"Deserialize to tensor failed, maybe the loaded file is "
"not a paddle model(expected file format: 0, but %u found).",
version));
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
}
// the 3st filed, Tensor
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx, seek, shape);
phi::DeserializeFromStream(is, tensor, dev_ctx, seek, shape);
}
void DeserializeFromStream(std::istream &is,
LoDTensor *tensor,
const platform::DeviceContext &dev_ctx) {
{
// the 1st field, unit32_t version for LoDTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(framework::IsTensorVersionSupported(version),
true,
platform::errors::InvalidArgument(
"Tensor version %u is not supported.", version));
PADDLE_ENFORCE_EQ(
version,
0U,
platform::errors::InvalidArgument(
"Deserialize to tensor failed, maybe the loaded file is "
"not a paddle model(expected file format: 0, but %u found).",
version));
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
// the 3st filed, Tensor
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
phi::DeserializeFromStream(is, tensor, dev_ctx);
}
LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
......
......@@ -14,32 +14,15 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/phi/core/serialization.h"
namespace paddle {
namespace framework {
void SerializeToStream(std::ostream& os,
const phi::SelectedRows& selected_rows,
const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char*>(&version), sizeof(version));
}
{
// the 2st field, rows information
auto& rows = selected_rows.rows();
uint64_t size = rows.size();
os.write(reinterpret_cast<const char*>(&size), sizeof(size));
for (uint64_t i = 0; i < size; ++i) {
os.write(reinterpret_cast<const char*>(&rows[i]), sizeof(rows[i]));
}
}
{
// the 3st field, the height of SelectedRows
int64_t height = selected_rows.height();
os.write(reinterpret_cast<const char*>(&height), sizeof(height));
}
// the 4st field, Tensor data
TensorToStream(os, selected_rows.value(), dev_ctx);
phi::SerializeToStream(os, selected_rows, dev_ctx);
}
void SerializeToStream(std::ostream& os,
......@@ -48,50 +31,21 @@ void SerializeToStream(std::ostream& os,
const platform::DeviceContext* dev_ctx;
auto place = selected_rows.place();
dev_ctx = pool.Get(place);
SerializeToStream(os, selected_rows, *dev_ctx);
phi::SerializeToStream(os, selected_rows, *dev_ctx);
}
void DeserializeFromStream(std::istream& os, phi::SelectedRows* selected_rows) {
void DeserializeFromStream(std::istream& is, phi::SelectedRows* selected_rows) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
dev_ctx = pool.Get(platform::CPUPlace());
DeserializeFromStream(os, selected_rows, *dev_ctx);
phi::DeserializeFromStream(is, selected_rows, *dev_ctx);
}
void DeserializeFromStream(std::istream& is,
phi::SelectedRows* selected_rows,
const platform::DeviceContext& dev_ctx) {
{
// the 1st field, unit32_t version for SelectedRows
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version,
0U,
platform::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
}
{
// the 2st field, rows information
uint64_t size = 0;
is.read(reinterpret_cast<char*>(&size), sizeof(size));
PADDLE_ENFORCE_EQ(
is.good(),
true,
platform::errors::Unavailable("Cannot read the number of rows."));
auto& rows = *selected_rows->mutable_rows();
rows.resize(size);
for (uint64_t i = 0; i < size; ++i) {
is.read(reinterpret_cast<char*>(&rows[i]), sizeof(int64_t));
}
}
{
// the 3st field, the height of the SelectedRows
int64_t height;
is.read(reinterpret_cast<char*>(&height), sizeof(int64_t));
selected_rows->set_height(height);
}
// the 4st field, tensor which contains the data
TensorFromStream(is, selected_rows->mutable_value(), dev_ctx);
phi::DeserializeFromStream(is, selected_rows, dev_ctx);
}
} // namespace framework
} // namespace paddle
......@@ -42,7 +42,7 @@ void DeserializeFromStream(std::istream& is,
void SerializeToStream(std::ostream& os,
const phi::SelectedRows& selected_rows);
void DeserializeFromStream(std::istream& os, phi::SelectedRows* selected_rows);
void DeserializeFromStream(std::istream& is, phi::SelectedRows* selected_rows);
} // namespace framework
} // namespace paddle
......@@ -112,18 +112,6 @@ void TensorToVector(const Tensor& src,
template <typename T>
void TesnorToVector(const Tensor& src, std::vector<T>* dst);
void TensorToStream(std::ostream& os,
const Tensor& tensor,
const platform::DeviceContext& dev_ctx);
void TensorFromStream(std::istream& is,
Tensor* tensor,
const platform::DeviceContext& dev_ctx);
void TensorFromStream(std::istream& is,
Tensor* tensor,
const platform::DeviceContext& dev_ctx,
const size_t& seek,
const std::vector<int64_t>& shape);
// convert dlpack's DLTensor to tensor
void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst);
......
......@@ -73,6 +73,10 @@ cc_library(
phi_device_context
SRCS device_context.cc
DEPS dense_tensor selected_rows)
cc_library(
serialization
SRCS serialization.cc
DEPS version tensor phi_device_context)
cc_library(
custom_kernel
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/serialization.h"
#include "paddle/phi/core/enforce.h"
// Note: The TensorToStream depends on framework.proto,
// it is difficult to move into phi
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/version.h"
namespace phi {
void SerializeToStream(std::ostream &os,
const DenseTensor &tensor,
const DeviceContext &dev_ctx) {
{ // the 1st field, uint32_t version for DenseTensor
os.write(
reinterpret_cast<const char *>(&paddle::framework::kCurTensorVersion),
sizeof(paddle::framework::kCurTensorVersion));
}
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() * sizeof(phi::LoD::value_type::value_type);
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
os.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
// the 3st field, Tensor
paddle::framework::TensorToStream(
os, static_cast<DenseTensor>(tensor), dev_ctx);
}
void DeserializeFromStream(std::istream &is,
DenseTensor *tensor,
const DeviceContext &dev_ctx,
const size_t &seek,
const std::vector<int64_t> &shape) {
{
// the 1st field, unit32_t version for DenseTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(paddle::framework::IsTensorVersionSupported(version),
true,
phi::errors::InvalidArgument(
"Tensor version %u is not supported.", version));
PADDLE_ENFORCE_EQ(
version,
0U,
phi::errors::InvalidArgument(
"Deserialize to tensor failed, maybe the loaded file is "
"not a paddle model(expected file format: 0, but %u found).",
version));
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
}
// the 3st filed, Tensor
paddle::framework::TensorFromStream(
is, static_cast<DenseTensor *>(tensor), dev_ctx, seek, shape);
}
void DeserializeFromStream(std::istream &is,
DenseTensor *tensor,
const DeviceContext &dev_ctx) {
{
// the 1st field, unit32_t version for DenseTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(paddle::framework::IsTensorVersionSupported(version),
true,
phi::errors::InvalidArgument(
"Tensor version %u is not supported.", version));
PADDLE_ENFORCE_EQ(
version,
0U,
phi::errors::InvalidArgument(
"Deserialize to tensor failed, maybe the loaded file is "
"not a paddle model(expected file format: 0, but %u found).",
version));
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
// the 3st filed, Tensor
paddle::framework::TensorFromStream(
is, static_cast<DenseTensor *>(tensor), dev_ctx);
}
void SerializeToStream(std::ostream &os,
const SelectedRows &selected_rows,
const DeviceContext &dev_ctx) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{
// the 2st field, rows information
auto &rows = selected_rows.rows();
uint64_t size = rows.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (uint64_t i = 0; i < size; ++i) {
os.write(reinterpret_cast<const char *>(&rows[i]), sizeof(rows[i]));
}
}
{
// the 3st field, the height of SelectedRows
int64_t height = selected_rows.height();
os.write(reinterpret_cast<const char *>(&height), sizeof(height));
}
// the 4st field, Tensor data
paddle::framework::TensorToStream(os, selected_rows.value(), dev_ctx);
}
void DeserializeFromStream(std::istream &is,
SelectedRows *selected_rows,
const DeviceContext &dev_ctx) {
{
// the 1st field, unit32_t version for SelectedRows
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version,
0U,
phi::errors::InvalidArgument(
"Only version 0 SelectedRows is supported."));
}
{
// the 2st field, rows information
uint64_t size = 0;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
PADDLE_ENFORCE_EQ(
is.good(),
true,
phi::errors::Unavailable("Cannot read the number of rows."));
auto &rows = *selected_rows->mutable_rows();
rows.resize(size);
for (uint64_t i = 0; i < size; ++i) {
is.read(reinterpret_cast<char *>(&rows[i]), sizeof(int64_t));
}
}
{
// the 3st field, the height of the SelectedRows
int64_t height;
is.read(reinterpret_cast<char *>(&height), sizeof(int64_t));
selected_rows->set_height(height);
}
// the 4st field, tensor which contains the data
paddle::framework::TensorFromStream(
is, selected_rows->mutable_value(), dev_ctx);
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
/*
* Serialize/Desiralize DenseTensor to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream& os,
const DenseTensor& tensor,
const DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is,
DenseTensor* tensor,
const DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is,
DenseTensor* tensor,
const DeviceContext& dev_ctx,
const size_t& seek,
const std::vector<int64_t>& shape);
/*
* Serialize/Desiralize SelectedRows to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream& os,
const SelectedRows& selected_rows,
const DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is,
SelectedRows* selected_rows,
const DeviceContext& dev_ctx);
} // namespace phi
......@@ -25,8 +25,23 @@ limitations under the License. */
namespace phi {
using DDim = phi::DDim;
/*
* LoD is short for Level of Details.
*
* - in a level, each element indicates relative offset of the lower level
* - the first element should be 0 and that indicates that this sequence start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<std::vector<size_t>>;
/// \brief The meta data of dense tensor. Take the structure type
/// and use all default operations.
///
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册