From 2cdef424d9cb379887cb826ae514b01e3ff7d569 Mon Sep 17 00:00:00 2001 From: Yancey Date: Thu, 28 Dec 2017 13:31:50 +0800 Subject: [PATCH] Implement selectedrows serialize and deserialize (#7042) * implement selectedrows serialize and deserialize * make serialize/deserialize as global function * recover send_imp.cc * delete unused brackets * fix compile error * serialize version in LodTensor and SelecetedRows * fix ci * fix ci --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/lod_tensor.cc | 112 +++++------------------- paddle/framework/lod_tensor_test.cc | 14 +++ paddle/framework/selected_rows.cc | 55 +++++++++++- paddle/framework/selected_rows.h | 9 ++ paddle/framework/selected_rows_test.cc | 14 +++ paddle/framework/tensor_test.cc | 114 +++++++++++++------------ paddle/framework/tensor_util.h | 100 ++++++++++++++++++++++ paddle/framework/tensor_util_test.cc | 50 +++++++++++ paddle/operators/load_op.cc | 2 +- paddle/operators/math/CMakeLists.txt | 8 +- 11 files changed, 326 insertions(+), 154 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 738684795d..8bfa41715f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,7 +5,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) +cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context framework_proto) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index f8a3be9a82..7b6dc09bdb 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -189,62 +189,16 @@ void AppendLoD(LoD *lod, const LoD &lod_length) { void SerializeToStream(std::ostream &os, const LoDTensor &tensor, const platform::DeviceContext &dev_ctx) { - // TODO(typhoonzero): serialize to ostream - { // the 1st field, uint32_t version + { // the 1st field, uint32_t version for LoDTensor constexpr uint32_t version = 0; os.write(reinterpret_cast(&version), sizeof(version)); } - { // the 2nd field, tensor description - // int32_t size - // void* protobuf message - proto::TensorDesc desc; - desc.set_data_type(framework::ToDataType(tensor.type())); - auto dims = framework::vectorize(tensor.dims()); - auto *pb_dims = desc.mutable_dims(); - pb_dims->Resize(static_cast(dims.size()), 0); - std::copy(dims.begin(), dims.end(), pb_dims->begin()); - int32_t size = desc.ByteSize(); - os.write(reinterpret_cast(&size), sizeof(size)); - auto out = desc.SerializeAsString(); - os.write(out.data(), size); - } - { // the 3rd field, tensor data - uint64_t size = tensor.memory_size(); - auto *data_ptr = tensor.data(); - PADDLE_ENFORCE(size < std::numeric_limits::max(), - "Index overflow when writing tensor"); - if (platform::is_gpu_place(tensor.place())) { -#ifdef PADDLE_WITH_CUDA - constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB - std::unique_ptr buf(new char[kBufSize]); - auto &gpu_dev_ctx = - static_cast(dev_ctx); - platform::CPUPlace cpu; - uintptr_t data = reinterpret_cast(data_ptr); - while (size != 0) { - size_t size_to_write = std::min(kBufSize, static_cast(size)); - memory::Copy(cpu, buf.get(), - boost::get(tensor.place()), - reinterpret_cast(data), size_to_write, - gpu_dev_ctx.stream()); - gpu_dev_ctx.Wait(); - os.write(buf.get(), size_to_write); - data += size_to_write; - size -= size_to_write; - } -#else - PADDLE_THROW("Unexpected branch"); -#endif - } else { - os.write(static_cast(data_ptr), - static_cast(size)); - } - } - { // the 4th field, lod information - // uint64_t lod_level - // uint64_t lod_level_1 size in byte. - // int* lod_level_1 data - // ... + { + // the 2st field, LoD information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... auto lod = tensor.lod(); uint64_t size = lod.size(); os.write(reinterpret_cast(&size), sizeof(size)); @@ -256,49 +210,19 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor, static_cast(size)); } } + // the 3st field, Tensor + SerializeToStream(os, static_cast(tensor), dev_ctx); } void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); - proto::TensorDesc desc; - { // int32_t size - // proto buffer - int32_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - is.read(reinterpret_cast(buf.get()), size); - PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), - "Cannot parse tensor desc"); - } - { // read tensor - std::vector dims; - dims.reserve(static_cast(desc.dims().size())); - std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); - tensor->Resize(framework::make_ddim(dims)); - - void *buf; - platform::Place cpu = platform::CPUPlace(); - switch (desc.data_type()) { - case proto::FP32: - buf = tensor->mutable_data(cpu); - break; - case proto::FP64: - buf = tensor->mutable_data(cpu); - break; - case proto::INT32: - buf = tensor->mutable_data(cpu); - break; - case proto::INT64: - buf = tensor->mutable_data(cpu); - break; - default: - PADDLE_THROW("DataType %d not supported", desc.data_type()); - } - is.read(static_cast(buf), tensor->memory_size()); - } - { // read lod + { + // the 1st field, unit32_t version for SelectedRows + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + } + { + // the 2st field, LoD information uint64_t lod_level; is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); auto &lod = *tensor->mutable_lod(); @@ -312,6 +236,8 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { lod[i] = tmp; } } + // the 3st filed, Tensor + DeserializeFromStream(is, static_cast(tensor)); } } // namespace framework diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 02d84b6823..0747c8db53 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -126,6 +126,20 @@ TEST_F(LoDTensorTester, ShrinkInLevel) { EXPECT_NE(t1.data(), lod_tensor_.data()); } +TEST_F(LoDTensorTester, SerializeAndDeserialize) { + LoDTensor dst_tensor; + platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); + std::ostringstream oss; + SerializeToStream(oss, lod_tensor_, cpu_ctx); + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor); + float* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < kLodTensorSize; ++i) { + EXPECT_EQ(dst_ptr[i], i); + } + EXPECT_EQ(dst_tensor.lod(), lod_tensor_.lod()); +} + TEST(LodExpand, test) { LoD lod{{0, 2}}; LoDTensor tensor; diff --git a/paddle/framework/selected_rows.cc b/paddle/framework/selected_rows.cc index c74459c9dd..82adfa7123 100644 --- a/paddle/framework/selected_rows.cc +++ b/paddle/framework/selected_rows.cc @@ -12,5 +12,58 @@ limitations under the License. */ #include "paddle/framework/selected_rows.h" namespace paddle { -namespace framework {} // namespace framework +namespace framework { +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, + const platform::DeviceContext& dev_ctx) { + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { + // the 2st field, rows information + auto& rows = selected_rows.rows(); + uint64_t size = rows.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + for (uint64_t i = 0; i < size; ++i) { + os.write(reinterpret_cast(&rows[i]), sizeof(rows[i])); + } + } + { + // the 3st field, the height of SelectedRows + int64_t height = selected_rows.height(); + os.write(reinterpret_cast(&height), sizeof(height)); + } + // the 4st field, Tensor data + SerializeToStream(os, selected_rows.value(), dev_ctx); +} + +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows) { + auto tensor = *selected_rows->mutable_value(); + { + // the 1st field, unit32_t version for SelectedRows + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + } + { + // the 2st field, rows information + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + auto& rows = *selected_rows->mutable_rows(); + rows.resize(size); + for (uint64_t i = 0; i < size; ++i) { + is.read(reinterpret_cast(&rows[i]), sizeof(int64_t)); + } + } + { + // the 3st field, the height of the SelectedRows + int64_t height; + is.read(reinterpret_cast(&height), sizeof(int64_t)); + selected_rows->set_height(height); + } + // the 4st field, tensor which contains the data + DeserializeFromStream(is, &tensor); +} + +} // namespace framework } // namespace paddle diff --git a/paddle/framework/selected_rows.h b/paddle/framework/selected_rows.h index 0332b91323..699e392688 100644 --- a/paddle/framework/selected_rows.h +++ b/paddle/framework/selected_rows.h @@ -59,5 +59,14 @@ class SelectedRows { int64_t height_; }; +/* + * Serialize/Desiralize SelectedRows to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, + const platform::DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows); + } // namespace framework } // namespace paddle diff --git a/paddle/framework/selected_rows_test.cc b/paddle/framework/selected_rows_test.cc index 4ee13a65d7..75487c4010 100644 --- a/paddle/framework/selected_rows_test.cc +++ b/paddle/framework/selected_rows_test.cc @@ -43,5 +43,19 @@ TEST_F(SelectedRowsTester, complete_dims) { ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100})); } +TEST_F(SelectedRowsTester, SerializeAndDeseralize) { + SelectedRows dst_tensor; + platform::CPUDeviceContext cpu_ctx(place_); + std::ostringstream oss; + + SerializeToStream(oss, *selected_rows_, cpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor); + + ASSERT_EQ(selected_rows_->rows(), dst_tensor.rows()); + ASSERT_EQ(selected_rows_->height(), dst_tensor.height()); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index ca76a9fcb9..a1b4a03289 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -15,12 +15,13 @@ #include #include +namespace framework = paddle::framework; +namespace platform = paddle::platform; + TEST(Tensor, Dims) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor tt; + framework::Tensor tt; tt.Resize({2, 3, 4}); - DDim dims = tt.dims(); + framework::DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { EXPECT_EQ(i + 2, dims[i]); @@ -28,12 +29,12 @@ TEST(Tensor, Dims) { } TEST(Tensor, DataAssert) { - paddle::framework::Tensor src_tensor; + framework::Tensor src_tensor; bool caught = false; try { src_tensor.data(); - } catch (paddle::platform::EnforceNotMet err) { + } catch (platform::EnforceNotMet err) { caught = true; std::string msg = "holder_ should not be null\nTensor holds no memory. Call " @@ -50,61 +51,65 @@ TEST(Tensor, DataAssert) { because Memory::Alloc() and Memory::Free() have not been ready. */ TEST(Tensor, MutableData) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; + framework::Tensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({1, 2, 3}), + platform::CPUPlace()); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), CPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({3, 4}), + platform::CPUPlace()); EXPECT_NE(p2, nullptr); EXPECT_NE(p1, p2); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), CPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({2, 2, 3}), + platform::CPUPlace()); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({2, 2}), + platform::CPUPlace()); EXPECT_EQ(p1, p2); } #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; + framework::Tensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), CUDAPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({1, 2, 3}), + platform::CUDAPlace()); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), CUDAPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({3, 4}), + platform::CUDAPlace()); EXPECT_NE(p2, nullptr); EXPECT_NE(p1, p2); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), CUDAPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({2, 2, 3}), + platform::CUDAPlace()); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), CUDAPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({2, 2}), + platform::CUDAPlace()); EXPECT_EQ(p1, p2); } #endif } TEST(Tensor, ShareDataWith) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; - Tensor dst_tensor; + framework::Tensor src_tensor; + framework::Tensor dst_tensor; // Try to share data form uninitialized tensor bool caught = false; try { @@ -121,16 +126,18 @@ TEST(Tensor, ShareDataWith) { } ASSERT_TRUE(caught); - src_tensor.mutable_data(make_ddim({2, 3, 4}), CPUPlace()); + src_tensor.mutable_data(framework::make_ddim({2, 3, 4}), + platform::CPUPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), CUDAPlace()); + framework::Tensor src_tensor; + framework::Tensor dst_tensor; + src_tensor.mutable_data(framework::make_ddim({2, 3, 4}), + platform::CUDAPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } @@ -138,13 +145,12 @@ TEST(Tensor, ShareDataWith) { } TEST(Tensor, Slice) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({5, 3, 4}), CPUPlace()); - Tensor slice_tensor = src_tensor.Slice(1, 3); - DDim slice_dims = slice_tensor.dims(); + framework::Tensor src_tensor; + src_tensor.mutable_data(framework::make_ddim({5, 3, 4}), + platform::CPUPlace()); + framework::Tensor slice_tensor = src_tensor.Slice(1, 3); + framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 3); EXPECT_EQ(slice_dims[0], 2); EXPECT_EQ(slice_dims[1], 3); @@ -153,11 +159,12 @@ TEST(Tensor, Slice) { uintptr_t src_data_address = reinterpret_cast(src_tensor.data()); uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), CPUPlace())); + src_tensor.mutable_data(src_tensor.dims(), platform::CPUPlace())); uintptr_t slice_data_address = reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), CPUPlace())); + uintptr_t slice_mutable_data_address = + reinterpret_cast(slice_tensor.mutable_data( + slice_tensor.dims(), platform::CPUPlace())); EXPECT_EQ(src_data_address, src_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); @@ -165,22 +172,25 @@ TEST(Tensor, Slice) { #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), CUDAPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); + framework::Tensor src_tensor; + src_tensor.mutable_data(framework::make_ddim({6, 9}), + platform::CUDAPlace()); + framework::Tensor slice_tensor = src_tensor.Slice(2, 6); + framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 2); EXPECT_EQ(slice_dims[0], 4); EXPECT_EQ(slice_dims[1], 9); uintptr_t src_data_address = reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), CUDAPlace())); + uintptr_t src_mutable_data_address = + reinterpret_cast(src_tensor.mutable_data( + src_tensor.dims(), platform::CUDAPlace())); uintptr_t slice_data_address = reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), CUDAPlace())); + uintptr_t slice_mutable_data_address = + reinterpret_cast(slice_tensor.mutable_data( + slice_tensor.dims(), platform::CUDAPlace())); EXPECT_EQ(src_data_address, src_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); @@ -189,23 +199,19 @@ TEST(Tensor, Slice) { } TEST(Tensor, ReshapeToMatrix) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor src; - int* src_ptr = src.mutable_data({2, 3, 4, 9}, CPUPlace()); + framework::Tensor src; + int* src_ptr = src.mutable_data({2, 3, 4, 9}, platform::CPUPlace()); for (int i = 0; i < 2 * 3 * 4 * 9; ++i) { src_ptr[i] = i; } - Tensor res = ReshapeToMatrix(src, 2); + framework::Tensor res = framework::ReshapeToMatrix(src, 2); ASSERT_EQ(res.dims()[0], 2 * 3); ASSERT_EQ(res.dims()[1], 4 * 9); } TEST(Tensor, Layout) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor src; - ASSERT_EQ(src.layout(), DataLayout::kNHWC); - src.set_layout(DataLayout::kAnyLayout); - ASSERT_EQ(src.layout(), DataLayout::kAnyLayout); + framework::Tensor src; + ASSERT_EQ(src.layout(), framework::DataLayout::kNHWC); + src.set_layout(framework::DataLayout::kAnyLayout); + ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout); } diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index ea4e4f22ea..108006911a 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/tensor.h" namespace paddle { @@ -205,5 +207,103 @@ inline void CopyToVector(const Tensor& src, std::vector* dst) { src_ptr, size); } +inline void SerializeToStream(std::ostream& os, const Tensor& tensor, + const platform::DeviceContext& dev_ctx) { + // TODO(typhoonzero): serialize to ostream + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { // the 2nd field, tensor description + // int32_t size + // void* protobuf message + proto::TensorDesc desc; + desc.set_data_type(framework::ToDataType(tensor.type())); + auto dims = framework::vectorize(tensor.dims()); + auto* pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + os.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + os.write(out.data(), size); + } + { // the 3rd field, tensor data + uint64_t size = tensor.memory_size(); + auto* data_ptr = tensor.data(); + PADDLE_ENFORCE(size < std::numeric_limits::max(), + "Index overflow when writing tensor"); + if (platform::is_gpu_place(tensor.place())) { +#ifdef PADDLE_WITH_CUDA + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto& gpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + boost::get(tensor.place()), + reinterpret_cast(data), size_to_write, + gpu_dev_ctx.stream()); + gpu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + os.write(static_cast(data_ptr), + static_cast(size)); + } + } +} + +inline void DeserializeFromStream(std::istream& is, Tensor* tensor) { + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + proto::TensorDesc desc; + { // int32_t size + // proto buffer + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); + tensor->Resize(framework::make_ddim(dims)); + + void* buf; + platform::Place cpu = platform::CPUPlace(); + // TODO(Yancey1989): use VisiterDataType instead of DataType switch + switch (desc.data_type()) { + case proto::FP32: + buf = tensor->mutable_data(cpu); + break; + case proto::FP64: + buf = tensor->mutable_data(cpu); + break; + case proto::INT32: + buf = tensor->mutable_data(cpu); + break; + case proto::INT64: + buf = tensor->mutable_data(cpu); + break; + default: + PADDLE_THROW("DataType %d not supported", desc.data_type()); + } + is.read(static_cast(buf), tensor->memory_size()); + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index f388c19f28..1281e9c2a4 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -230,5 +230,55 @@ TEST(CopyToVector, Tensor) { #endif } +TEST(Tensor, SerializeAndDeserialize) { + framework::Tensor src_tensor; + int array[6] = {1, 2, 3, 4, 5, 6}; + src_tensor.Resize({2, 3}); + int* src_ptr = src_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 6; ++i) { + src_ptr[i] = array[i]; + } + { + framework::Tensor dst_tensor; + auto place = new platform::CPUPlace(); + platform::CPUDeviceContext cpu_ctx(*place); + std::ostringstream oss; + SerializeToStream(oss, src_tensor, cpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor); + int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(dst_ptr[i], array[i]); + } + delete place; + } +#ifdef PADDLE_WITH_CUDA + { + Tensor gpu_tensor; + gpu_tensor.Resize({2, 3}); + Tensor dst_tensor; + + auto gpu_place = new platform::CUDAPlace(); + platform::CUDADeviceContext gpu_ctx(*gpu_place); + + CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + std::ostringstream oss; + SerializeToStream(oss, gpu_tensor, gpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor); + + int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(dst_ptr[i], array[i]); + } + + delete gpu_place; + } +#endif +} + } // namespace framework } // namespace paddle diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index 65f021d919..08b972a233 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -38,7 +38,7 @@ class LoadOp : public framework::OperatorBase { out_var_name); auto *tensor = out_var->GetMutable(); - framework::DeserializeFromStream(fin, tensor); + DeserializeFromStream(fin, tensor); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index bf47879f77..b97faec4ed 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -9,9 +9,9 @@ if(WITH_GPU) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) - nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) + nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) - nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) + nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context) @@ -23,9 +23,9 @@ else() cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context) cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function) - cc_library(vol2col SRCS vol2col.cc DEPS device_context) + cc_library(vol2col SRCS vol2col.cc DEPS device_context tensor) cc_library(context_project SRCS context_project.cc DEPS device_context math_function) - cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) + cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(maxouting SRCS maxouting.cc DEPS device_context) cc_library(unpooling SRCS unpooling.cc DEPS device_context) -- GitLab