From efc2464f6cff14a5f771bb7e1e6ad8a0366ff110 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 25 Oct 2017 20:36:07 -0700 Subject: [PATCH] Feature/save op (#5090) * Init * Stash * Polish SaveLoadOp * Fix CI * Polish code * Save GPU Tensor * Stash * Fix CI --- doc/design/model_format.md | 28 ++- paddle/framework/CMakeLists.txt | 3 +- paddle/framework/data_type.h | 1 + paddle/framework/lod_tensor.cc | 137 ------------- paddle/framework/lod_tensor.h | 25 +-- paddle/framework/lod_tensor_test.cc | 16 -- paddle/framework/lod_tensor_test.cu | 29 +-- paddle/framework/saver.proto | 39 ---- paddle/framework/tensor.h | 2 + paddle/framework/tensor_impl.h | 6 +- paddle/framework/variable.h | 2 + paddle/memory/memcpy.h | 1 - paddle/operators/CMakeLists.txt | 3 +- paddle/operators/load_op.cc | 132 +++++++++++++ paddle/operators/save_load_op_test.cc | 63 ++++++ paddle/operators/save_op.cc | 184 ++++++++++++++++++ paddle/operators/save_restore_op.cc | 147 -------------- python/paddle/v2/framework/framework.py | 2 +- .../framework/tests/test_save_restore_op.py | 71 ------- 19 files changed, 410 insertions(+), 481 deletions(-) delete mode 100644 paddle/framework/saver.proto create mode 100644 paddle/operators/load_op.cc create mode 100644 paddle/operators/save_load_op_test.cc create mode 100644 paddle/operators/save_op.cc delete mode 100644 paddle/operators/save_restore_op.cc delete mode 100644 python/paddle/v2/framework/tests/test_save_restore_op.py diff --git a/doc/design/model_format.md b/doc/design/model_format.md index a1c086775..e29129fdd 100644 --- a/doc/design/model_format.md +++ b/doc/design/model_format.md @@ -12,24 +12,22 @@ The topology is saved as a plain text in a detailed self-contain protobuf file. The parameters are saved as a binary file. As we all know, the protobuf message has a limit of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We have done a [benchmark experiment](https://github.com/PaddlePaddle/Paddle/pull/4610), which shows that protobuf is not fit for the task. -As a result, we design a particular format for tensor serialization. By default, an arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of [LoDTensorDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99). We save the DescProto as the byte string header. It contains all the necessary information, such as the `dims`, the `name` of the tensor, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). A tensor stores values in a continuous memory buffer. For speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, - -|HeaderLength|ContentLength|**LoDTensorDesc**|**TensorValue**| +As a result, we design a particular format for tensor serialization. By default, an arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of [LoDTensorDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99). We save the DescProto as the byte string header. It contains all the necessary information, such as the `dims`, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). A tensor stores values in a continuous memory buffer. For speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, The table below shows a tensor's byte view in detail. Note that all the signed values are written in the little-endian format. -```text -[offset] [type] [description] -0004 4 bytes integer HeaderLength, the length of LoDTensorDesc -0008 4 bytes integer ContentLength, the length of LodTensor Buffer -0009 1 bytes char TensorDesc -00010 1 bytes char TensorDesc -... -00100 1 bytes char TensorValue -00101 1 bytes char TensorValue -00102 1 bytes char TensorValue .. -... -``` +|field name | type | description | +| --- | --- | --- | +| version | uint32_t | Version of saved file. Always 0 now. | +| tensor desc length | uint32_t | TensorDesc(Protobuf message) length in bytes. | +| tensor desc | void* | TensorDesc protobuf binary message | +| tensor data | void* | Tensor's data in binary format. The length of `tensor_data` is decided by `TensorDesc.dims()` and `TensorDesc.data_type()` | +| lod_level | uint64_t | Level of LoD | +| length of lod[0] | uint64_t | [Optional] length of lod[0] in bytes. | +| data of lod[0] | uint64_t* | [Optional] lod[0].data() | +| ... | ... | ... | + + ## Summary diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 85374a476..0a77859d6 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,6 +1,5 @@ # ddim lib proto_library(framework_proto SRCS framework.proto) -proto_library(saver_proto SRCS framework.proto saver.proto) cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) @@ -10,7 +9,7 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor saver_proto framework_proto) +cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h index c25a62c2b..bafb4fbd4 100644 --- a/paddle/framework/data_type.h +++ b/paddle/framework/data_type.h @@ -15,6 +15,7 @@ #pragma once #include #include "paddle/framework/framework.pb.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 731235cd9..584308a53 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -13,7 +13,6 @@ limitations under the License. */ #include "paddle/framework/lod_tensor.h" -#include "paddle/framework/saver.pb.h" #include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" @@ -136,141 +135,5 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty."); ShareDataWith(Slice(begin, end)); } - -std::string LoDTensor::SerializeToString() const { - LoDTensorProto desc; - - // set data_type - if (this->type() == typeid(int8_t)) desc.set_data_type(DataType::BOOL); - if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16); - if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32); - if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64); - // FIXME(dzh): there is no fp16 in standard c++ - - if (this->type() == typeid(float)) // NOLINT - desc.set_data_type(DataType::FP32); - if (this->type() == typeid(double)) // NOLINT - desc.set_data_type(DataType::FP64); - - for (int i = 0; i < dims().size(); ++i) { - desc.add_dims(dims()[i]); - } - - // set lod information - desc.set_lod_level(this->NumLevels()); - for (size_t i = 0; i < this->NumLevels(); ++i) { - LoDInfo* lod = desc.add_levels(); - for (size_t j = 0; j < lod_[i].size(); ++j) { - lod->add_level(lod_[i][j]); - } - } - - desc.set_version(0); - - std::string desc_bytes = desc.SerializeAsString(); - - // FIXME(dzh) : implement fix chunk size buffer. - size_t DESC_SIZE = desc_bytes.size(); - size_t DATA_SIZE = holder_->size() - offset_; - - const size_t BUFFER_SIZE = DESC_SIZE + DATA_SIZE + 2 * sizeof(size_t); - char* buffer = - static_cast(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE)); - - // format: desc_size data_size, desc_bytes, data_bytes. - platform::CPUPlace src_place; - platform::CPUPlace dst_place; - - memory::Copy(dst_place, buffer, src_place, &BUFFER_SIZE, sizeof(size_t)); - memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DESC_SIZE, - sizeof(size_t)); - memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place, - desc_bytes.c_str(), desc_bytes.size()); - - PADDLE_ENFORCE(this->numel() != 0, "Serialize a empty Tensor!"); - - platform::Place place = holder_->place(); - int element_width = holder_->size() / this->numel(); - - if (platform::is_cpu_place(place)) { - memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), - boost::get(place), - static_cast(holder_->ptr()) + offset_ / element_width, - DATA_SIZE); - } -#ifdef PADDLE_WITH_GPU - if (platform::is_gpu_place(place)) { - memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), - boost::get(place), - static_cast(holder_->ptr()) + offset_ / element_width, - DATA_SIZE); - } -#endif - - std::string ret(buffer, BUFFER_SIZE); - memory::Free(platform::CPUPlace(), buffer); - return ret; -} - -void LoDTensor::DeserializeFromString(const std::string& s, - const platform::Place& dst_place) { - size_t DESC_SIZE, BUFFER_SIZE; - platform::CPUPlace src_place; - - memory::Copy(src_place, &BUFFER_SIZE, src_place, s.c_str(), sizeof(size_t)); - memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str() + sizeof(size_t), - sizeof(size_t)); - - const size_t DATA_SIZE = BUFFER_SIZE - DESC_SIZE - sizeof(size_t) * 2; - - // parse LoDTensorDesc - LoDTensorProto desc; - desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE); - - std::vector dims; - std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); - this->Resize(make_ddim(dims)); - - // parse data type - void* ptr = nullptr; - if (desc.data_type() == DataType::BOOL) - ptr = this->mutable_data(dst_place); - if (desc.data_type() == DataType::INT16) - ptr = this->mutable_data(dst_place); - if (desc.data_type() == DataType::INT32) - ptr = this->mutable_data(dst_place); - if (desc.data_type() == DataType::INT64) - ptr = this->mutable_data(dst_place); - // FIXME(dzh): there is no fp16 in standard c++ - - if (desc.data_type() == DataType::FP32) - ptr = this->mutable_data(dst_place); - if (desc.data_type() == DataType::FP64) - ptr = this->mutable_data(dst_place); - - LoD lod; - std::vector levels; - for (int i = 0; i < desc.levels().size(); ++i) { - auto current_level = desc.levels()[i].level(); - std::copy(current_level.begin(), current_level.end(), - std::back_inserter(levels)); - lod.emplace_back(levels); - levels.clear(); - } - - this->set_lod(lod); - - if (platform::is_cpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), ptr, src_place, - s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE); - } -#ifdef PADDLE_WITH_GPU - if (platform::is_gpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), ptr, src_place, - s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE); - } -#endif -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 735d85f75..f4fe4cdac 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -85,7 +85,9 @@ class LoDTensor : public Tensor { void set_lod(const LoD& lod) { lod_ = lod; } - LoD lod() const { return lod_; } + const LoD& lod() const { return lod_; } + + LoD* mutable_lod() { return &lod_; } /* * Get the start offset and end offset of an element from LoD. @@ -139,27 +141,6 @@ class LoDTensor : public Tensor { */ void ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end); - /** - * @brief Serialize tensor to char bytes. - * Please check model_format.md for the format detail. - * NOTE: GPUTensor will copy data to cpu implicitly. - * @return return string - */ - - // FIXME(dzh) : Currently, this interface should only be used in - // save/restore model and checkpoint. ParameterServer do not use shape - // information to do the optimization, as a result, when we serialize - // parameter/gradient to string, we should serialize the tensor - // to string in the ps trainer instead of LoDTensor. - std::string SerializeToString() const; - - /** - * @brief Deserialize char bytes to tensor. - * @return return string - */ - void DeserializeFromString(const std::string& s, - const platform::Place& dst_place); - private: LoD lod_; }; diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index f309376c8..aa2f6c993 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -144,21 +144,5 @@ TEST(LodExpand, test) { } } -TEST_F(LoDTensorTester, SerializeDeserialize) { - LoDTensor new_lod_tensor = lod_tensor_; - float* src_ptr = lod_tensor_.data(); - std::string s = lod_tensor_.SerializeToString(); - LoDTensor dst; - dst.DeserializeFromString(s, platform::CPUPlace()); - float* dst_ptr = dst.data(); - for (int i = 0; i < kLodTensorSize; ++i) { - EXPECT_EQ(dst_ptr[i], src_ptr[i]); - } - - ASSERT_EQ(dst.NumElements(0), 2UL); - ASSERT_EQ(dst.NumElements(1), 3UL); - ASSERT_EQ(dst.NumElements(2), 8UL); -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 11659be02..c79c4d0c7 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -47,31 +47,4 @@ TEST(LoDTensor, LoDInGPU) { for (size_t i = 0; i < src_lod[0].size(); ++i) { CHECK_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2); } -} - -TEST(LoDTensor, SerializeDeserialize) { - paddle::framework::LoDTensor lod_tensor; - paddle::platform::GPUPlace place(0); - - paddle::framework::LoD src_lod; - src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); - - lod_tensor.Resize({14, 16}); - lod_tensor.mutable_data(place); - - lod_tensor.set_lod(src_lod); - CHECK_EQ(lod_tensor.lod_element(0, 2).first, 4UL); - CHECK_EQ(lod_tensor.lod_element(0, 4).first, 8UL); - - test<<<1, 8>>>(src_lod[0].data(), src_lod[0].size()); - cudaDeviceSynchronize(); - - std::string s = lod_tensor.SerializeToString(); - paddle::framework::LoDTensor dst; - dst.DeserializeFromString(s, place); - paddle::framework::LoD dst_lod = dst.lod(); - - for (size_t i = 0; i < dst_lod[0].size(); ++i) { - CHECK_EQ(src_lod[0].data()[i], dst_lod[0].data()[i] * 2); - } -} +} \ No newline at end of file diff --git a/paddle/framework/saver.proto b/paddle/framework/saver.proto deleted file mode 100644 index 90a191a6a..000000000 --- a/paddle/framework/saver.proto +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -syntax = "proto2"; -option optimize_for = LITE_RUNTIME; -package paddle.framework; - -import "framework.proto"; - -/** - * This file contains necessary information for model, checkpoint. - * etc. - */ - -message LoDInfo { repeated int64 level = 1; } - -/** - * Save the LoDTensorDesc information through LoDTensorProto, its data memory - * is copyed to c buffer immediately. See model_format.md for details. - */ - -message LoDTensorProto { - optional DataType data_type = 1; - repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - repeated LoDInfo levels = 3; - optional int32 lod_level = 4 [ default = 0 ]; - optional int32 version = 5; -} diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index e31472327..9d2dc6a32 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -132,6 +132,8 @@ class Tensor { std::type_index type() const { return holder_->type(); } + size_t memory_size() const; + private: inline void check_memory_size() const; diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index f6e801bbb..29ac683f4 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -62,12 +62,16 @@ inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE_GE( - holder_->size(), numel() * SizeOfType(type()) + offset_, + holder_->size(), memory_size() + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory.\n" "or maybe the required data-type mismatches the data already stored."); } +inline size_t Tensor::memory_size() const { + return holder_ == nullptr ? 0UL : numel() * SizeOfType(type()); +} + template inline const T* Tensor::data() const { check_memory_size(); diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index a80f0e66b..cde5ec241 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -46,6 +46,8 @@ class Variable { std::type_index(typeid(T)) == std::type_index(holder_->Type()); } + void Clear() { holder_.reset(); } + private: struct Placeholder { virtual ~Placeholder() {} diff --git a/paddle/memory/memcpy.h b/paddle/memory/memcpy.h index 9b36182c2..29c20e186 100644 --- a/paddle/memory/memcpy.h +++ b/paddle/memory/memcpy.h @@ -54,6 +54,5 @@ void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, cudaStream_t stream); #endif - } // namespace memory } // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index d2d70d8be..1ca4ba29d 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -82,7 +82,7 @@ function(op_library TARGET) # It's enough to just adding one operator to pybind file(APPEND ${pybind_file} "USE_OP(sigmoid);\n") endif() - + # reduce_op contains several operators if ("${TARGET}" STREQUAL "reduce_op") set(pybind_flag 1) @@ -148,3 +148,4 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc DEPS dynamic_recurrent_op recurrent_op tensor_array) +cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc new file mode 100644 index 000000000..2d4eff0c3 --- /dev/null +++ b/paddle/operators/load_op.cc @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" + +#include + +namespace paddle { +namespace operators { + +class LoadOp : public framework::OperatorBase { + public: + LoadOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto filename = Attr("file_path"); + std::ifstream fin(filename); + PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s for load op", + filename); + + auto out_var_name = Output("Out"); + auto *out_var = scope.FindVar(out_var_name); + PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", + out_var_name); + + auto *tensor = out_var->GetMutable(); + + uint32_t version; + fin.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + framework::TensorDesc desc; + { // int32_t size + // proto buffer + int32_t size; + fin.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + fin.read(reinterpret_cast(buf.get()), size); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), + std::back_inserter(dims)); + tensor->Resize(framework::make_ddim(dims)); + + void *buf; + platform::Place cpu = platform::CPUPlace(); + switch (desc.data_type()) { + case framework::FP32: + buf = tensor->mutable_data(cpu); + break; + case framework::FP64: + buf = tensor->mutable_data(cpu); + break; + case framework::INT32: + buf = tensor->mutable_data(cpu); + break; + case framework::INT64: + buf = tensor->mutable_data(cpu); + break; + default: + PADDLE_THROW("DataType %d not supported", desc.data_type()); + } + fin.read(static_cast(buf), tensor->memory_size()); + } + { // read lod + uint64_t lod_level; + fin.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + fin.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + fin.read(reinterpret_cast(tmp.data()), + static_cast(size)); + lod[i] = tmp; + } + } + + auto place = dev_ctx.GetPlace(); + if (platform::is_gpu_place(place)) { + // copy CPU to GPU + framework::LoDTensor cpu_tensor; + cpu_tensor.ShareDataWith(*tensor); + cpu_tensor.set_lod(tensor->lod()); + + // reset tensor + out_var->Clear(); + tensor = out_var->GetMutable(); + tensor->set_lod(cpu_tensor.lod()); + tensor->CopyFrom(cpu_tensor, place, dev_ctx); + } + } +}; + +class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + LoadOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "The tensor need to be loaded"); + AddComment(R"DOC(Load Operator +Load operator will load a tensor variable from disk file. +)DOC"); + AddAttr("file_path", + "Variable will be loaded from \"file_path\".") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + } +}; +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR(load, ops::LoadOp, ops::LoadOpProtoMaker); diff --git a/paddle/operators/save_load_op_test.cc b/paddle/operators/save_load_op_test.cc new file mode 100644 index 000000000..fe2b15ec0 --- /dev/null +++ b/paddle/operators/save_load_op_test.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" + +USE_NO_KERNEL_OP(save); +USE_NO_KERNEL_OP(load); + +TEST(SaveLoadOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({10, 10}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + expect_lod[0].push_back(0); + expect_lod[0].push_back(1); + expect_lod[0].push_back(2); + expect_lod[0].push_back(3); + + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) { + expect[i] = static_cast(i); + } + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("tensor.save")}); + + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, ctx); + + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, ctx); + int* actual = target->data(); + for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + auto& actual_lod = target->lod(); + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} \ No newline at end of file diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc new file mode 100644 index 000000000..490256dfa --- /dev/null +++ b/paddle/operators/save_op.cc @@ -0,0 +1,184 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include + +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +// TODO(yuyang18): If the functions below are needed by other files, move them +// to paddle::filesystem namespace. +constexpr char kSEP = '/'; +static bool FileExists(const std::string &filepath) { + struct stat buffer; + return (stat(filepath.c_str(), &buffer) == 0); +} + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(kSEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static void MkDir(const char *path) { + if (mkdir(path, 0755)) { + PADDLE_ENFORCE_EQ(errno, EEXIST, "%s mkdir failed!", path); + } +} + +static void MkDirRecursively(const char *fullpath) { + if (*fullpath == '\0') return; // empty string + if (FileExists(fullpath)) return; + + MkDirRecursively(DirName(fullpath).c_str()); + MkDir(fullpath); +} + +class SaveOp : public framework::OperatorBase { + public: + SaveOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto filename = Attr("file_path"); + auto overwrite = Attr("overwrite"); + + if (FileExists(filename) && !overwrite) { + PADDLE_THROW("%s is existed, cannot save to it when overwrite=false", + filename, overwrite); + } + + MkDirRecursively(DirName(filename).c_str()); + + // FIXME(yuyang18): We save variable to local file now, but we should change + // it to save an output stream. + std::ofstream fout(filename); + PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + filename); + + auto iname = Input("X"); + auto *var = scope.FindVar(iname); + PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s for save_op", + iname); + + PADDLE_ENFORCE(var->IsType(), + "SaveOp only support LoDTensor, %s has wrong type", iname); + + auto &tensor = var->Get(); + + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + fout.write(reinterpret_cast(&version), sizeof(version)); + } + { // the 2nd field, tensor description + // int32_t size + // void* protobuf message + framework::TensorDesc desc; + desc.set_data_type(framework::ToDataType(tensor.type())); + auto dims = framework::vectorize(tensor.dims()); + auto *pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + fout.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + fout.write(out.data(), size); + } + { // the 3rd field, tensor data + uint64_t size = tensor.memory_size(); + auto *data_ptr = tensor.data(); + PADDLE_ENFORCE(size < std::numeric_limits::max(), + "Index overflow when writing tensor"); + if (platform::is_gpu_place(tensor.place())) { +#ifdef PADDLE_WITH_CUDA + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto &gpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + boost::get(tensor.place()), + reinterpret_cast(data), size_to_write, + gpu_dev_ctx.stream()); + gpu_dev_ctx.Wait(); + fout.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + fout.write(static_cast(data_ptr), + static_cast(size)); + } + } + { // the 4th field, lod information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... + auto lod = tensor.lod(); + uint64_t size = lod.size(); + fout.write(reinterpret_cast(&size), sizeof(size)); + + for (auto &each : lod) { + size = each.size() * sizeof(framework::LoD::value_type::value_type); + fout.write(reinterpret_cast(&size), sizeof(size)); + fout.write(reinterpret_cast(each.data()), + static_cast(size)); + } + } + } +}; + +class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + SaveOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The tensor need to be saved"); + AddComment(R"DOC(Save operator +Save operator will serialize and write a tensor variable to disk file. +)DOC"); + AddAttr("overwrite", "Overwrite the output file if exist") + .SetDefault(true); + AddAttr("file_path", + "Variable will be saved to \"file_path\".") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(save, ops::SaveOp, ops::SaveOpProtoMaker); diff --git a/paddle/operators/save_restore_op.cc b/paddle/operators/save_restore_op.cc deleted file mode 100644 index 314e4e927..000000000 --- a/paddle/operators/save_restore_op.cc +++ /dev/null @@ -1,147 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" - -#include - -namespace paddle { -namespace operators { - -using framework::Tensor; -using framework::LoDTensor; - -inline static std::string VarToFileName(const std::string& folder_path, - const std::string& var_name) { - return folder_path + "/__" + var_name + "__"; -} - -class SaveOp : public framework::OperatorBase { - public: - SaveOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { - const auto& var_names = this->Inputs("X"); - for (const auto& name : var_names) { - PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name), - "Can not find variable '%s' in the scope.", name); - } - std::string folder_path = this->Attr("folderPath"); - PADDLE_ENFORCE(!folder_path.empty(), - "'folderPath' of SaveOp shouldn't be empty."); - - VLOG(1) << "Save variables to folder: " << folder_path; - for (const auto& name : var_names) { - std::string file_name = VarToFileName(folder_path, name); - std::ofstream fout(file_name, std::ofstream::out); - PADDLE_ENFORCE(fout.is_open(), "Fail to create file %s.", file_name); - const LoDTensor& tensor = scope.FindVar(name)->Get(); - std::string bytes = tensor.SerializeToString(); - fout << bytes; - fout.close(); - } - VLOG(1) << "Compelete saving variables. Items count: " << var_names.size(); - } -}; - -class SaveOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SaveOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", - "(tensor), the tensor count can be 1~INT_MAX, tensors names which " - "values will be saved.") - .AsDuplicable(); - AddAttr("folderPath", "the folderPath for save model."); - AddComment(R"DOC( -Save the input tensors to a binary file based on input tensor names and absolute path. - -All the inputs can carry the LoD (Level of Details) information, -or not. -)DOC"); - } -}; - -class RestoreOp : public framework::OperatorBase { - public: - RestoreOp(const std::string& type, const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { - const auto& var_names = this->Outputs("Out"); - for (const auto& name : var_names) { - PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name), - "Can not find variable '%s' in the scope.", name); - } - std::string folder_path = this->Attr("folderPath"); - PADDLE_ENFORCE(!folder_path.empty(), - "'folderPath' of RestoreOp shouldn't be empty."); - - VLOG(1) << "Try loading variables from folder: " << folder_path; - - for (const auto& name : var_names) { - std::string file_name = VarToFileName(folder_path, name); - std::ifstream fin(file_name, std::ifstream::in); - PADDLE_ENFORCE(fin.is_open(), "Fail to open file %s.", file_name); - const size_t kBufferSize = 4096; // equal to linux page size - char buffer[kBufferSize]; - std::string cache; - while (!fin.eof()) { - fin.read(buffer, kBufferSize); - cache.append(buffer, fin.gcount()); - } - LoDTensor* tensor = scope.FindVar(name)->GetMutable(); - tensor->DeserializeFromString(cache, dev_ctx.GetPlace()); - fin.close(); - } - VLOG(1) << "Complete loading variables."; - } -}; - -class RestoreOpMaker : public framework::OpProtoAndCheckerMaker { - public: - RestoreOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddOutput("Out", - "(tensor), the tensor count can be 1~INT_MAX, tensors which " - "values will be restores.") - .AsDuplicable(); - AddAttr("folderPath", "the folderPath for model file."); - AddAttr("data_type", "output tensor data type") - .SetDefault(framework::DataType::FP32); - AddComment(R"DOC( -Restore the tensors from model file based on absolute path. - -All the tensors outputs may carry the LoD (Level of Details) information, -or not. -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OPERATOR(save, paddle::operators::SaveOp, - paddle::framework::EmptyGradOpMaker, - paddle::operators::SaveOpMaker); - -REGISTER_OPERATOR(restore, paddle::operators::RestoreOp, - paddle::framework::EmptyGradOpMaker, - paddle::operators::RestoreOpMaker); diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index b3f8be8be..8f28d3e76 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -261,7 +261,7 @@ class Operator(object): self.desc.set_attr(attr_name, attrs[attr_name]) self.desc.check_attrs() - no_kernel_op_set = {'feed', 'fetch', 'save', 'restore'} + no_kernel_op_set = {'feed', 'fetch', 'save', 'load'} if type not in no_kernel_op_set: self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) diff --git a/python/paddle/v2/framework/tests/test_save_restore_op.py b/python/paddle/v2/framework/tests/test_save_restore_op.py deleted file mode 100644 index 3a36d03f6..000000000 --- a/python/paddle/v2/framework/tests/test_save_restore_op.py +++ /dev/null @@ -1,71 +0,0 @@ -import paddle.v2.framework.core as core -import paddle.v2.framework.framework as framework -import paddle.v2.framework.executor as executor - -import numpy as np -import unittest -import os -import sys -import shutil - -FOLDER_PATH = "./tmp_test_dir" - - -class TestSaveRestoreOp(unittest.TestCase): - def test_save_restore_op(self): - tensor_1_val = np.random.rand(3, 9).astype("float32") - tensor_2_val = np.random.randint(0, 20, size=(4, 2)).astype("int32") - place = core.CPUPlace() - - program = framework.Program() - block = program.global_block() - v_a = block.create_var( - dtype="float32", shape=[3, 9], lod_level=0, name="tensor_1") - v_b = block.create_var( - dtype="int32", shape=[4, 2], lod_level=0, name="tensor_2") - - t_1 = core.LoDTensor() - t_1.set(tensor_1_val, place) - t_2 = core.LoDTensor() - t_2.set(tensor_2_val, place) - block.append_op( - type="save", - inputs={"X": [v_a, v_b]}, - attrs={"folderPath": FOLDER_PATH}) - block.append_op( - type="fill_constant", - outputs={"Out": [v_a]}, - attrs={"shape": [2, 2], - "value": 0.0}) - block.append_op( - type="fill_constant", - outputs={"Out": [v_b]}, - attrs={"shape": [2, 2], - "value": 0.0}) - block.append_op( - type="restore", - outputs={"Out": [v_a, v_b]}, - attrs={"folderPath": FOLDER_PATH}) - - if os.path.exists(FOLDER_PATH): - shutil.rmtree(FOLDER_PATH) - os.makedirs(FOLDER_PATH) - - exe = executor.Executor(place) - out = exe.run(program, - feed={"tensor_1": t_1, - "tensor_2": t_2}, - fetch_list=[v_a, v_b]) - - self.assertTrue(os.path.isdir(FOLDER_PATH)) - self.assertTrue(os.path.isfile(FOLDER_PATH + "/__tensor_1__")) - self.assertTrue(os.path.isfile(FOLDER_PATH + "/__tensor_2__")) - - self.assertTrue(np.array_equal(np.array(out[0]), tensor_1_val)) - self.assertTrue(np.array_equal(np.array(out[1]), tensor_2_val)) - - shutil.rmtree(FOLDER_PATH) - - -if __name__ == "__main__": - unittest.main() -- GitLab