/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/lod_tensor.h" #include "paddle/framework/saver.pb.h" #include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" #include #include #include #include #include namespace paddle { namespace framework { LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { LoD new_lod; new_lod.reserve(level_end - level_begin); for (size_t i = level_begin; i < level_end; i++) { new_lod.emplace_back(in.at(i)); } // transform the lowest level to absolute offset. LoD abs_offset_lod = ToAbsOffset(in); new_lod.back() = abs_offset_lod[level_end - 1]; return new_lod; } LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE_LT(level, in.size()); PADDLE_ENFORCE_LT(elem_end, in[level].size()); LoD res; res.resize(in.size() - level); // copy the first level res[0].assign(in[level].begin() + elem_begin, in[level].begin() + elem_end + 1); for (size_t lvl = 1; lvl < res.size(); lvl++) { const auto& in_level = in[level + lvl]; const auto& above_level = res[lvl - 1]; auto& out_level = res[lvl]; out_level.assign(in_level.begin() + above_level.front(), in_level.begin() + above_level.back() + 1); } for (size_t lvl = 0; lvl < res.size(); lvl++) { // to make the first offset equals 0, all the elements minus the first // element size_t front = res[lvl].front(); for (auto& ele : res[lvl]) { ele -= front; } } return res; } LoD ToAbsOffset(const LoD& in) { // the lowest level stores relative offsets if (in.empty() || in.size() == 1) return in; LoD result = in; for (int level = result.size() - 2; level >= 0; level--) { for (auto& ele : result[level]) { ele = result[level + 1][ele]; } } return result; } bool operator==(const LoD& a, const LoD& b) { if (a.size() != b.size()) { return false; } for (size_t i = 0; i < a.size(); i++) { const auto& a_level = a[i]; const auto& b_level = b[i]; if (a_level.size() != b_level.size()) { return false; } for (size_t j = 0; j < a_level.size(); j++) { if (a_level[j] != b_level[j]) { return false; } } } return true; } size_t LoDTensor::NumElements(size_t level, size_t idx) const { PADDLE_ENFORCE_LT(level, NumLevels()); PADDLE_ENFORCE_LT(idx, NumElements(level)); return lod_[level][idx + 1] - lod_[level][idx]; } size_t LoDTensor::NumInstancesInElement(size_t level, size_t idx) const { PADDLE_ENFORCE_LT(level, NumLevels()); PADDLE_ENFORCE_LT(idx, NumElements(level)); auto abs_lod = ToAbsOffset(lod()); size_t begin = abs_lod[level][idx]; size_t end = abs_lod[level][idx + 1]; return end - begin; } void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) { auto new_lod = framework::SliceLevels(lod_, level_begin, level_end); lod_ = new_lod; } void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE_LT(level, NumLevels()); PADDLE_ENFORCE_LT(elem_begin, NumElements(level)); PADDLE_ENFORCE_LT(elem_end, NumElements(level) + 1); auto abs_lod = framework::ToAbsOffset(lod()); auto new_lod = framework::SliceInLevel(lod_, level, elem_begin, elem_end); lod_ = new_lod; // slice the underlying tensor size_t begin = abs_lod[level][elem_begin]; size_t end = abs_lod[level][elem_end]; PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty."); ShareDataWith(Slice(begin, end)); } std::string LoDTensor::SerializeToString() const { LoDTensorProto desc; // set data_type if (this->type() == typeid(int8_t)) desc.set_data_type(DataType::BOOL); if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16); if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32); if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64); // FIXME(dzh): there is no fp16 in standard c++ if (this->type() == typeid(float)) // NOLINT desc.set_data_type(DataType::FP32); if (this->type() == typeid(double)) // NOLINT desc.set_data_type(DataType::FP64); for (int i = 0; i < dims().size(); ++i) { desc.add_dims(dims()[i]); } // set lod information desc.set_lod_level(this->NumLevels()); for (size_t i = 0; i < this->NumLevels(); ++i) { LoDInfo* lod = desc.add_levels(); for (size_t j = 0; j < lod_[i].size(); ++j) { lod->add_level(lod_[i][j]); } } desc.set_version(0); std::string desc_bytes = desc.SerializeAsString(); // FIXME(dzh) : implement fix chunk size buffer. size_t DESC_SIZE = desc_bytes.size(); size_t DATA_SIZE = holder_->size() - offset_; const size_t BUFFER_SIZE = DESC_SIZE + DATA_SIZE + 2 * sizeof(size_t); char* buffer = static_cast(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE)); // format: desc_size data_size, desc_bytes, data_bytes. platform::CPUPlace src_place; platform::CPUPlace dst_place; memory::Copy(dst_place, buffer, src_place, &BUFFER_SIZE, sizeof(size_t)); memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DESC_SIZE, sizeof(size_t)); memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place, desc_bytes.c_str(), desc_bytes.size()); PADDLE_ENFORCE(this->numel() != 0, "Serialize a empty Tensor!"); platform::Place place = holder_->place(); int element_width = holder_->size() / this->numel(); if (platform::is_cpu_place(place)) { memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), boost::get(place), static_cast(holder_->ptr()) + offset_ / element_width, DATA_SIZE); } #ifdef PADDLE_WITH_GPU if (platform::is_gpu_place(place)) { memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(), boost::get(place), static_cast(holder_->ptr()) + offset_ / element_width, DATA_SIZE); } #endif std::string ret(buffer, BUFFER_SIZE); memory::Free(platform::CPUPlace(), buffer); return ret; } void LoDTensor::DeserializeFromString(const std::string& s, const platform::Place& dst_place) { size_t DESC_SIZE, BUFFER_SIZE; platform::CPUPlace src_place; memory::Copy(src_place, &BUFFER_SIZE, src_place, s.c_str(), sizeof(size_t)); memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str() + sizeof(size_t), sizeof(size_t)); const size_t DATA_SIZE = BUFFER_SIZE - DESC_SIZE - sizeof(size_t) * 2; // parse LoDTensorDesc LoDTensorProto desc; desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE); std::vector dims; std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); this->Resize(make_ddim(dims)); // parse data type void* ptr = nullptr; if (desc.data_type() == DataType::BOOL) ptr = this->mutable_data(dst_place); if (desc.data_type() == DataType::INT16) ptr = this->mutable_data(dst_place); if (desc.data_type() == DataType::INT32) ptr = this->mutable_data(dst_place); if (desc.data_type() == DataType::INT64) ptr = this->mutable_data(dst_place); // FIXME(dzh): there is no fp16 in standard c++ if (desc.data_type() == DataType::FP32) ptr = this->mutable_data(dst_place); if (desc.data_type() == DataType::FP64) ptr = this->mutable_data(dst_place); LoD lod; std::vector levels; for (int i = 0; i < desc.levels().size(); ++i) { auto current_level = desc.levels()[i].level(); std::copy(current_level.begin(), current_level.end(), std::back_inserter(levels)); lod.emplace_back(levels); levels.clear(); } this->set_lod(lod); if (platform::is_cpu_place(dst_place)) { memory::Copy(boost::get(dst_place), ptr, src_place, s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE); } #ifdef PADDLE_WITH_GPU if (platform::is_gpu_place(dst_place)) { memory::Copy(boost::get(dst_place), ptr, src_place, s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE); } #endif } } // namespace framework } // namespace paddle