diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 3e239e9911d03a43987825ffa7824298a748ebda..b29f528f3f749efa3463125c774c2f4d4ebcbc7c 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -286,18 +286,18 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, DeserializeFromStream(is, static_cast(tensor), dev_ctx); } -// TODO(tonyyang-svail): make this function support LoD std::vector LoDTensor::SplitLoDTensor( const std::vector places) const { check_memory_size(); - PADDLE_ENFORCE(lod().empty(), "Disable parallel lod for now"); - size_t result_size = std::min(static_cast(dims()[0]), places.size()); - size_t remainder = dims()[0] % places.size(); + int batch_size = + lod().empty() ? dims()[0] : static_cast(lod()[0].size()) - 1; + size_t result_size = std::min(static_cast(batch_size), places.size()); + size_t remainder = batch_size % places.size(); std::vector results; results.reserve(result_size); - int step_width = static_cast(dims()[0] / result_size); + int step_width = static_cast(batch_size / result_size); for (size_t i = 0; i < result_size; ++i) { int begin = static_cast(i * step_width); int end = static_cast((i + 1) * step_width); @@ -305,13 +305,28 @@ std::vector LoDTensor::SplitLoDTensor( end += remainder; } - auto src = Slice(begin, end); - auto &dst_place = places[i]; LoDTensor dst; - if (!(dst_place == place())) { + if (lod().empty()) { + auto src = Slice(begin, end); + auto &dst_place = places[i]; framework::Copy(src, dst_place, &dst); - } else { // It is no need to copy if src_place and dst_place are same. - dst.ShareDataWith(src); + } else { + auto lod_and_offset = GetSubLoDAndAbsoluteOffset(lod(), begin, end, 0); + + auto &offset = lod_and_offset.second; + auto src = Slice(offset.first, offset.second); + auto &dst_place = places[i]; + framework::Copy(src, dst_place, &dst); + + LoD my_lod; + for (auto &l : lod_and_offset.first) { + std::vector v{0}; + for (auto &ll : l) { + v.push_back(ll + v.back()); + } + my_lod.emplace_back(v); + } + dst.set_lod(my_lod); } results.emplace_back(dst); } @@ -319,29 +334,38 @@ std::vector LoDTensor::SplitLoDTensor( return results; } -// TODO(tonyyang-svail): make this function support LoD void LoDTensor::MergeLoDTensor( const std::vector &lod_tensors, platform::Place dst_place) { PADDLE_ENFORCE(!lod_tensors.empty()); + framework::DDim new_dim = lod_tensors[0]->dims(); std::type_index new_type = lod_tensors[0]->type(); - auto new_layout = lod_tensors[0]->layout(); - int64_t new_height = 0; - for (auto *lod : lod_tensors) { - new_height += lod->dims()[0]; - for (int i = 1; i < new_dim.size(); ++i) { - PADDLE_ENFORCE_EQ(new_dim[i], lod->dims()[i]); + framework::DataLayout new_layout = lod_tensors[0]->layout(); + LoD new_lod = lod_tensors[0]->lod(); + for (size_t i = 1; i < lod_tensors.size(); ++i) { + auto *t = lod_tensors[i]; + PADDLE_ENFORCE_EQ(new_type.hash_code(), t->type().hash_code()); + PADDLE_ENFORCE_EQ(new_layout, t->layout()); + + PADDLE_ENFORCE_EQ(framework::product(new_dim) / new_dim[0], + framework::product(t->dims()) / t->dims()[0]); + new_dim[0] += t->dims()[0]; + + auto &lod = t->lod(); + for (size_t j = 0; j < lod.size(); ++j) { + auto &sub_lod = new_lod[j]; + auto &offset = sub_lod.back(); + for (size_t k = 1; k < lod[j].size(); ++k) { + sub_lod.push_back(lod[j][k] + offset); + } } - - PADDLE_ENFORCE_EQ(new_type, lod->type()); - PADDLE_ENFORCE_EQ(new_layout, lod->layout()); } - new_dim[0] = new_height; Resize(new_dim); set_layout(new_layout); - + set_lod(new_lod); mutable_data(dst_place, new_type); + int begin = 0; for (auto *src : lod_tensors) { int end = begin + src->dims()[0]; diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 9c7ad6c7b47952bd137eeedf302e2b9182fe8279..2c4d2e10e016da8253410dd241cf8790bd453d72 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -100,6 +100,71 @@ TEST(LoD, ToAbsOffset) { EXPECT_EQ(abs_lod, expected); } +TEST(LoD, SplitLoDTensor) { + LoD lod; + lod.push_back(std::vector({0, 2, 4, 5, 6})); + lod.push_back(std::vector({0, 1, 6, 8, 13, 15, 20})); + + platform::CPUPlace place; + LoDTensor lod_tensor; + lod_tensor.Resize({20, 1}); + float* dst_ptr = lod_tensor.mutable_data(place); + for (int i = 0; i < lod_tensor.numel(); ++i) { + dst_ptr[i] = i; + } + lod_tensor.set_lod(lod); + + std::vector places{platform::CPUPlace(), + platform::CPUPlace()}; + LoD lod0; + lod0.push_back(std::vector({0, 2, 4})); + lod0.push_back(std::vector({0, 1, 6, 8, 13})); + LoD lod1; + lod1.push_back(std::vector({0, 1, 2})); + lod1.push_back(std::vector({0, 2, 7})); + + auto lods = lod_tensor.SplitLoDTensor(places); + EXPECT_EQ(lods[0].lod(), lod0); + EXPECT_EQ(lods[1].lod(), lod1); +} + +TEST(LoD, MergeLoDTensor) { + LoD lod; + lod.push_back(std::vector({0, 2, 4, 5, 6})); + lod.push_back(std::vector({0, 1, 6, 8, 13, 15, 20})); + + platform::CPUPlace place; + + LoDTensor lod_tensor0; + LoD lod0; + lod0.push_back(std::vector({0, 2, 4})); + lod0.push_back(std::vector({0, 1, 6, 8, 13})); + lod_tensor0.set_lod(lod0); + + lod_tensor0.Resize({13, 1}); + float* dst_ptr = lod_tensor0.mutable_data(place); + for (int i = 0; i < lod_tensor0.numel(); ++i) { + dst_ptr[i] = i; + } + + LoDTensor lod_tensor1; + LoD lod1; + lod1.push_back(std::vector({0, 1, 2})); + lod1.push_back(std::vector({0, 2, 7})); + lod_tensor1.set_lod(lod1); + lod_tensor1.Resize({7, 1}); + dst_ptr = lod_tensor1.mutable_data(place); + for (int i = 0; i < lod_tensor1.numel(); ++i) { + dst_ptr[i] = i; + } + + std::vector lods{&lod_tensor0, &lod_tensor1}; + + LoDTensor lod_tensor; + lod_tensor.MergeLoDTensor(lods, place); + EXPECT_EQ(lod_tensor.lod(), lod); +} + TEST(LoD, CheckLoD) { LoD relative_lod; relative_lod.push_back(std::vector({0, 2}));