diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 69a2a6eefaf8ca51d62842e62a6a731c6cbd3231..88375380fee6f68a1b5290ff1bf30763998f6514 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -151,7 +151,8 @@ bool CheckLoD(const LoD &in, int tensor_height) { } // check: the lowest level's last offset should equals `tensor_height` if // tensor_height>0. - if (tensor_height > 0 && (size_t)tensor_height != in.back().back()) + if (tensor_height > 0 && + static_cast(tensor_height) != in.back().back()) return false; // check: the higher level's last offset should equals the lower level's @@ -184,42 +185,13 @@ bool CheckAbsLoD(const LoD &in, int tensor_height) { if (level.front() != 0) return false; if (tensor_height < 0) { tensor_height = level.back(); - } else if ((size_t)tensor_height != level.back()) { + } else if (static_cast(tensor_height) != level.back()) { return false; } } return true; } -using LoDAndOffset = std::pair>; -LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, - size_t end_idx, size_t start_level) { - LoD sub_lod; - - for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) { - PADDLE_ENFORCE_LE(start_idx, end_idx, - platform::errors::InvalidArgument( - "The start index should be less than the end index, " - "but received start index is %d, end index is %d.", - start_idx, end_idx)); - PADDLE_ENFORCE_LT( - end_idx, lod[level_idx].size(), - platform::errors::InvalidArgument( - "The end index should be less than the LoD level size, but " - "received end index is %d, LoD level size is %d.", - end_idx, lod[level_idx].size())); - std::vector level_lens; - for (size_t i = start_idx; i < end_idx; ++i) { - level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]); - } - sub_lod.emplace_back(level_lens); - start_idx = lod[level_idx][start_idx]; - end_idx = lod[level_idx][end_idx]; - } - - return LoDAndOffset{sub_lod, {start_idx, end_idx}}; -} - void AppendLoD(LoD *lod, const LoD &lod_length) { PADDLE_ENFORCE( lod->empty() || lod->size() == lod_length.size(), @@ -347,153 +319,6 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, TensorFromStream(is, static_cast(tensor), dev_ctx); } -std::vector LoDTensor::SplitLoDTensor( - const std::vector places) const { - PADDLE_ENFORCE_GT(places.size(), 0, - platform::errors::InvalidArgument( - "Place number cannot be empty when splitting.")); - check_memory_size(); - size_t batch_size = - lod().empty() ? static_cast(dims()[0]) : lod()[0].size() - 1; - - // if batch_size is 0, just return #places.size() copys of empty - // tensors. - if (batch_size == 0) { - std::vector empty_results; - empty_results.reserve(places.size()); - for (size_t i = 0; i < places.size(); ++i) { - LoDTensor dst; - dst.Resize(dims()); - dst.mutable_data(places[i], type()); - if (!lod().empty()) { - dst.set_lod(lod()); - } - empty_results.emplace_back(std::move(dst)); - } - return empty_results; - } - - auto step_width = (batch_size + places.size() - 1) / places.size(); - auto result_size = (batch_size + step_width - 1) / step_width; - std::vector results; - results.reserve(result_size); - - for (size_t i = 0; i < result_size; ++i) { - auto begin = i * step_width; - auto end = std::min((i + 1) * step_width, batch_size); - PADDLE_ENFORCE_LT(begin, end, - platform::errors::InvalidArgument( - "The begin index must be less than the end index, " - "but received begin index is %d, end index is %d.", - begin, end)); - - LoDTensor dst; - if (lod().empty()) { - auto src = Slice(begin, end); - auto &dst_place = places[i]; - framework::TensorCopy(src, dst_place, &dst); - } else { - auto lod_and_offset = GetSubLoDAndAbsoluteOffset(lod(), begin, end, 0); - - auto &offset = lod_and_offset.second; - auto src = Slice(offset.first, offset.second); - auto &dst_place = places[i]; - framework::TensorCopy(src, dst_place, &dst); - - LoD my_lod; - for (auto &l : lod_and_offset.first) { - std::vector v{0}; - for (auto &ll : l) { - v.push_back(ll + v.back()); - } - my_lod.emplace_back(v); - } - dst.set_lod(my_lod); - } - results.emplace_back(std::move(dst)); - } - - return results; -} - -void LoDTensor::MergeLoDTensor( - const std::vector &lod_tensors, - platform::Place dst_place) { - PADDLE_ENFORCE_EQ(lod_tensors.empty(), false, - platform::errors::InvalidArgument( - "The LoDTensors to be merged are empty.")); - - framework::DDim new_dim = lod_tensors[0]->dims(); - proto::VarType::Type new_type = proto::VarType::FP32; - framework::DataLayout new_layout = lod_tensors[0]->layout(); - for (auto *t : lod_tensors) { - if (t->numel() && t->IsInitialized()) { - new_dim = t->dims(); - new_type = t->type(); - new_layout = t->layout(); - break; - } - } - - LoD new_lod = lod_tensors[0]->lod(); - - for (size_t i = 1; i < lod_tensors.size(); ++i) { - auto *t = lod_tensors[i]; - if (t->numel() && t->IsInitialized()) { - PADDLE_ENFORCE_EQ( - new_type, t->type(), - platform::errors::InvalidArgument( - "LoDTensor data type does not match, expected type is %s, actual " - "type is %s.", - DataTypeToString(new_type), DataTypeToString(t->type()))); - PADDLE_ENFORCE_EQ( - new_layout, t->layout(), - platform::errors::InvalidArgument( - "LoDTensor layout does not match, expected layout is %s, " - "actual layout is %s.", - DataLayoutToString(new_layout), DataLayoutToString(t->layout()))); - PADDLE_ENFORCE_EQ( - framework::product(new_dim) / new_dim[0], - framework::product(t->dims()) / t->dims()[0], - platform::errors::InvalidArgument( - "LoDTensor dimension does not match, all dimensions except the " - "first dimension need to be equal," - "but expected dimension is %s, actual dimension is %s.", - new_dim, t->dims())); - new_dim[0] += t->dims()[0]; - } - - auto &lod = t->lod(); - PADDLE_ENFORCE_EQ(new_lod.size(), lod.size(), - platform::errors::InvalidArgument( - "The LoD information of LoDTensor does not match, " - "expected LoD is %s, actual LoD is %s.", - new_lod, lod)); - for (size_t j = 0; j < lod.size(); ++j) { - auto &sub_lod = new_lod[j]; - size_t offset = sub_lod.back(); - for (size_t k = 1; k < lod[j].size(); ++k) { - sub_lod.push_back(lod[j][k] + offset); - } - } - } - Resize(new_dim); - set_layout(new_layout); - set_lod(new_lod); - mutable_data(dst_place, new_type); - - int begin = 0; - for (auto *src : lod_tensors) { - int end = begin + src->dims()[0]; - if (end == begin) { - continue; - } - auto dst = Slice(begin, end); - framework::TensorCopy(*src, dst_place, &dst); - begin = end; - } -} - LoD ConvertToLengthBasedLoD(const LoD &offset_lod) { LoD length_lod; length_lod.reserve(offset_lod.size()); diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 7dee0f44e384d4eda9ccb9507f62527a7795b221..6888b582e0c3629d93447a6b7fcbf657a07d630c 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -39,6 +39,8 @@ class DeviceContext; namespace paddle { namespace framework { +using LoD = std::vector>; + /* * LoD is short for Level of Details. * @@ -54,7 +56,6 @@ namespace framework { * 0 2 4 7 * 0 2 5 7 10 12 15 20 */ -using LoD = std::vector>; std::ostream& operator<<(std::ostream& os, const LoD& lod); std::ostream& operator<<(std::ostream& os, const LoDTensor& t); @@ -108,64 +109,14 @@ bool CheckAbsLoD(const LoD& in, int tensor_height = -1); */ class LoDTensor : public Tensor { public: - LoDTensor() : Tensor() {} - - explicit LoDTensor(const LoD& lod) : lod_(lod) {} - - void set_lod(const LoD& lod) { lod_ = lod; } - - const LoD& lod() const { return lod_; } - - LoD* mutable_lod() { return &lod_; } - - /* - * Get the start offset and end offset of an element from LoD. - */ - std::pair lod_element(size_t level, size_t elem) const { - PADDLE_ENFORCE_LT( - level, NumLevels(), - platform::errors::InvalidArgument( - "The input level of LoD is invalid, it should be less than LoD " - "size. The input level is %zu, the LoD size is %zu.", - level, NumLevels())); - PADDLE_ENFORCE_LT(elem, NumElements(level), - platform::errors::InvalidArgument( - "The input element of LoD is invalid, it should be " - "less than the number of elements in its level." - "The input element is %zu, the number of elements in " - "its level is %zu.", - elem, NumElements(level))); - return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]); - } - - /* - * Number of LoDTensor's levels, each level has units of data, for example, - * in the sentence's view, article, paragraph, sentence are 3 levels. - */ - size_t NumLevels() const { return lod_.size(); } - /* - * Number of elements in a level. - */ - size_t NumElements(size_t level = 0) const { - PADDLE_ENFORCE_LT( - level, NumLevels(), - platform::errors::InvalidArgument( - "The input level of LoD is invalid, it should be less than LoD " - "size. The input level is %zu, the LoD size is %zu.", - level, NumLevels())); - // the last offset is the end of last element - return (lod_)[level].size() - 1; - } - - // Split LoDTensor and copy to each place specified in places. - std::vector SplitLoDTensor( - const std::vector places) const; - void MergeLoDTensor(const std::vector& lod_tensors, - platform::Place place); - - private: - LoD lod_; + platform::Place place) { + std::vector tmp; + for (const LoDTensor* lod_tensor : lod_tensors) { + tmp.push_back(lod_tensor); + } + Tensor::MergeLoDTensor(tmp, place); + } }; /* @@ -210,21 +161,6 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, return tensor; } -// Get the absolute offset of a lod[start_level][start_idx:end_idx] and -// relative length of details for every levels(i.e., [start_level: ]). -// -// For example, -// lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]] -// start_level = 0 -// start_idx = 1 -// end_idx = 3 -// -// Returns: -// LoD = [[1, 4], [2, 4, 2, 3, 2]] -// pair = {11, 24} -std::pair> GetSubLoDAndAbsoluteOffset( - const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level); - void AppendLoD(LoD* lod, const LoD& lod_length); /* diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index cf71cdfc6d651019fce8989ecf30ed12952d6ce7..25837067b0daaa362ee12eabe1058595803d9217 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -23,10 +23,9 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/fluid/framework/details/cow_ptr.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/platform/device_context.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index cbbc020989d1e97c779ffca0b2048b973e554289..bd2535851a3f29ad1a7ab82a921f01d69a25071d 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/tensor_util.h" DECLARE_bool(use_stream_safe_cuda_allocator); @@ -235,5 +236,181 @@ void Tensor::ResetHolderWithType(std::shared_ptr holder, void Tensor::set_type(const proto::VarType::Type& type) { type_ = type; } +/* ---------------------------------------- */ +/* -------------- LoDTensor --------------- */ +/* ---------------------------------------- */ +using LoDAndOffset = std::pair>; +LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, + size_t end_idx, size_t start_level) { + LoD sub_lod; + + for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) { + PADDLE_ENFORCE_LE(start_idx, end_idx, + platform::errors::InvalidArgument( + "The start index should be less than the end index, " + "but received start index is %d, end index is %d.", + start_idx, end_idx)); + PADDLE_ENFORCE_LT( + end_idx, lod[level_idx].size(), + platform::errors::InvalidArgument( + "The end index should be less than the LoD level size, but " + "received end index is %d, LoD level size is %d.", + end_idx, lod[level_idx].size())); + std::vector level_lens; + for (size_t i = start_idx; i < end_idx; ++i) { + level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]); + } + sub_lod.emplace_back(level_lens); + start_idx = lod[level_idx][start_idx]; + end_idx = lod[level_idx][end_idx]; + } + + return LoDAndOffset{sub_lod, {start_idx, end_idx}}; +} + +std::vector Tensor::SplitLoDTensor( + const std::vector places) const { + PADDLE_ENFORCE_GT(places.size(), 0, + platform::errors::InvalidArgument( + "Place number cannot be empty when splitting.")); + check_memory_size(); + size_t batch_size = + lod().empty() ? static_cast(dims()[0]) : lod()[0].size() - 1; + + // if batch_size is 0, just return #places.size() copys of empty + // tensors. + if (batch_size == 0) { + std::vector empty_results; + empty_results.reserve(places.size()); + for (size_t i = 0; i < places.size(); ++i) { + Tensor dst; + dst.Resize(dims()); + dst.mutable_data(places[i], type()); + if (!lod().empty()) { + dst.set_lod(lod()); + } + empty_results.emplace_back(std::move(dst)); + } + return empty_results; + } + + auto step_width = (batch_size + places.size() - 1) / places.size(); + auto result_size = (batch_size + step_width - 1) / step_width; + std::vector results; + results.reserve(result_size); + + for (size_t i = 0; i < result_size; ++i) { + auto begin = i * step_width; + auto end = std::min((i + 1) * step_width, batch_size); + PADDLE_ENFORCE_LT(begin, end, + platform::errors::InvalidArgument( + "The begin index must be less than the end index, " + "but received begin index is %d, end index is %d.", + begin, end)); + + Tensor dst; + if (lod().empty()) { + auto src = Slice(begin, end); + auto& dst_place = places[i]; + framework::TensorCopy(src, dst_place, &dst); + } else { + auto lod_and_offset = GetSubLoDAndAbsoluteOffset(lod(), begin, end, 0); + + auto& offset = lod_and_offset.second; + auto src = Slice(offset.first, offset.second); + auto& dst_place = places[i]; + framework::TensorCopy(src, dst_place, &dst); + + LoD my_lod; + for (auto& l : lod_and_offset.first) { + std::vector v{0}; + for (auto& ll : l) { + v.push_back(ll + v.back()); + } + my_lod.emplace_back(v); + } + dst.set_lod(my_lod); + } + results.emplace_back(std::move(dst)); + } + + return results; +} + +void Tensor::MergeLoDTensor(const std::vector& lod_tensors, + platform::Place dst_place) { + PADDLE_ENFORCE_EQ(lod_tensors.empty(), false, + platform::errors::InvalidArgument( + "The LoDTensors to be merged are empty.")); + framework::DDim new_dim = lod_tensors[0]->dims(); + proto::VarType::Type new_type = proto::VarType::FP32; + framework::DataLayout new_layout = lod_tensors[0]->layout(); + for (auto* t : lod_tensors) { + if (t->numel() && t->IsInitialized()) { + new_dim = t->dims(); + new_type = t->type(); + new_layout = t->layout(); + break; + } + } + + LoD new_lod = lod_tensors[0]->lod(); + + for (size_t i = 1; i < lod_tensors.size(); ++i) { + auto* t = lod_tensors[i]; + if (t->numel() && t->IsInitialized()) { + PADDLE_ENFORCE_EQ( + new_type, t->type(), + platform::errors::InvalidArgument( + "LoDTensor data type does not match, expected type is %s, actual " + "type is %s.", + DataTypeToString(new_type), DataTypeToString(t->type()))); + PADDLE_ENFORCE_EQ( + new_layout, t->layout(), + platform::errors::InvalidArgument( + "LoDTensor layout does not match, expected layout is %s, " + "actual layout is %s.", + DataLayoutToString(new_layout), DataLayoutToString(t->layout()))); + PADDLE_ENFORCE_EQ( + framework::product(new_dim) / new_dim[0], + framework::product(t->dims()) / t->dims()[0], + platform::errors::InvalidArgument( + "LoDTensor dimension does not match, all dimensions except the " + "first dimension need to be equal," + "but expected dimension is %s, actual dimension is %s.", + new_dim, t->dims())); + new_dim[0] += t->dims()[0]; + } + + auto& lod = t->lod(); + PADDLE_ENFORCE_EQ(new_lod.size(), lod.size(), + platform::errors::InvalidArgument( + "The LoD information of LoDTensor does not match")); + + for (size_t j = 0; j < lod.size(); ++j) { + auto& sub_lod = new_lod[j]; + size_t offset = sub_lod.back(); + for (size_t k = 1; k < lod[j].size(); ++k) { + sub_lod.push_back(lod[j][k] + offset); + } + } + } + Resize(new_dim); + set_layout(new_layout); + set_lod(new_lod); + mutable_data(dst_place, new_type); + + int begin = 0; + for (auto* src : lod_tensors) { + int end = begin + src->dims()[0]; + if (end == begin) { + continue; + } + auto dst = Slice(begin, end); + framework::TensorCopy(*src, dst_place, &dst); + begin = end; + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 4b1ae041fc4cada165e792a37250a0c1a3de27b0..0ac2944ea3f93d4736c9ddb23f5e7e189a79c3ba 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" @@ -41,7 +42,7 @@ namespace paddle { namespace framework { -class LoDTensor; +using LoD = std::vector>; /* NOTE(liym27): [ What is TensorInplaceVersion used for? ] @@ -326,8 +327,75 @@ class Tensor { */ size_t offset_; std::shared_ptr inplace_version_counter_; + + /* ---------------------------------------------------------- */ + /* --------------- Reserved for LoDTensor ------------------- */ + /* ---------------------------------------------------------- */ + public: + explicit Tensor(const LoD& lod) : lod_(lod) {} + + void set_lod(const LoD& lod) { lod_ = lod; } + + const LoD& lod() const { return lod_; } + + LoD* mutable_lod() { return &lod_; } + + std::pair lod_element(size_t level, size_t elem) const { + PADDLE_ENFORCE_LT( + level, NumLevels(), + platform::errors::InvalidArgument( + "The input level of LoD is invalid, it should be less than LoD " + "size. The input level is %zu, the LoD size is %zu.", + level, NumLevels())); + PADDLE_ENFORCE_LT(elem, NumElements(level), + platform::errors::InvalidArgument( + "The input element of LoD is invalid, it should be " + "less than the number of elements in its level." + "The input element is %zu, the number of elements in " + "its level is %zu.", + elem, NumElements(level))); + return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]); + } + + size_t NumLevels() const { return lod_.size(); } + + size_t NumElements(size_t level = 0) const { + PADDLE_ENFORCE_LT( + level, NumLevels(), + platform::errors::InvalidArgument( + "The input level of LoD is invalid, it should be less than LoD " + "size. The input level is %zu, the LoD size is %zu.", + level, NumLevels())); + // the last offset is the end of last element + return (lod_)[level].size() - 1; + } + + // Split LoDTensor and copy to each place specified in places. + std::vector SplitLoDTensor( + const std::vector places) const; + + void MergeLoDTensor(const std::vector& lod_tensors, + platform::Place place); + + private: + LoD lod_; }; +// Get the absolute offset of a lod[start_level][start_idx:end_idx] and +// relative length of details for every levels(i.e., [start_level: ]). +// +// For example, +// lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]] +// start_level = 0 +// start_idx = 1 +// end_idx = 3 +// +// Returns: +// LoD = [[1, 4], [2, 4, 2, 3, 2]] +// pair = {11, 24} +std::pair> GetSubLoDAndAbsoluteOffset( + const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level); + } // namespace framework } // namespace paddle