From 9a0233de9fd9a25ccd37e996d741534de86ccb29 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 25 Oct 2017 16:02:14 -0400 Subject: [PATCH] Feature/tensor array lod pack (#5007) --- paddle/framework/lod_tensor.cc | 16 +++ paddle/framework/lod_tensor.h | 43 +++++++ paddle/framework/lod_tensor_test.cc | 49 +++++++- paddle/framework/tensor_array.cc | 159 +++++++++++++++++++++++++- paddle/framework/tensor_array.h | 13 +++ paddle/framework/tensor_array_test.cc | 52 +++++++++ 6 files changed, 323 insertions(+), 9 deletions(-) diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index f53dd1c185..731235cd98 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -106,6 +106,15 @@ size_t LoDTensor::NumElements(size_t level, size_t idx) const { return lod_[level][idx + 1] - lod_[level][idx]; } +size_t LoDTensor::NumInstancesInElement(size_t level, size_t idx) const { + PADDLE_ENFORCE_LT(level, NumLevels()); + PADDLE_ENFORCE_LT(idx, NumElements(level)); + auto abs_lod = ToAbsOffset(lod()); + size_t begin = abs_lod[level][idx]; + size_t end = abs_lod[level][idx + 1]; + return end - begin; +} + void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) { auto new_lod = framework::SliceLevels(lod_, level_begin, level_end); lod_ = new_lod; @@ -117,8 +126,15 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, PADDLE_ENFORCE_LT(elem_begin, NumElements(level)); PADDLE_ENFORCE_LT(elem_end, NumElements(level) + 1); + auto abs_lod = framework::ToAbsOffset(lod()); auto new_lod = framework::SliceInLevel(lod_, level, elem_begin, elem_end); lod_ = new_lod; + + // slice the underlying tensor + size_t begin = abs_lod[level][elem_begin]; + size_t end = abs_lod[level][elem_end]; + PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty."); + ShareDataWith(Slice(begin, end)); } std::string LoDTensor::SerializeToString() const { diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index f78a751c53..735d85f750 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -122,6 +122,12 @@ class LoDTensor : public Tensor { */ size_t NumElements(size_t level, size_t idx) const; + /* + * Get the number of instances in the underlying tensor in the `idx`-th + * element. + */ + size_t NumInstancesInElement(size_t level, size_t idx) const; + /* * Shrink levels[level_begin:level_end] */ @@ -157,5 +163,42 @@ class LoDTensor : public Tensor { private: LoD lod_; }; + +/* + * Expand the `source` to fit the LoD of `lod`. For example, a `source` + * LoDTensor is + * - LoD: [0, 2] + * - tensor: [a0, a1] + * a `lod` is + * - LoD: [0 3 5] + * returns a new LoDTensor + * - [a0 a0 a0 a1 a1] + */ +template +LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, + const platform::Place& place) { + LoD abs_lod = ToAbsOffset(lod); + const auto& lod_level = lod[level]; + size_t num_instances = source.dims()[0]; + + // new tensor + LoDTensor tensor; + tensor.set_lod(lod); + auto dims = source.dims(); + dims[0] = lod_level.back(); + tensor.Resize(dims); + tensor.mutable_data(place); + + PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1); + for (size_t ins = 0; ins < num_instances; ins++) { + for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) { + tensor.Slice(elem, elem + 1) + .CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), + platform::CPUDeviceContext()); + } + } + return tensor; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index b984d62071..f309376c8b 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -92,11 +92,14 @@ TEST_F(LoDTensorTester, ShrinkInLevel) { size_t level = 0; LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.ShrinkInLevel(level, 0, 1); - EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); - EXPECT_EQ(new_lod_tensor.NumElements(0), 1UL); - EXPECT_EQ(new_lod_tensor.NumElements(1), 2UL); - EXPECT_EQ(new_lod_tensor.NumElements(2), 5UL); - ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); + ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL); + ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL); + ASSERT_EQ(new_lod_tensor.NumElements(1), 2UL); + ASSERT_EQ(new_lod_tensor.NumElements(2), 5UL); + ASSERT_EQ(new_lod_tensor.dims()[0], 12); + for (int i = 0; i < 12 * 128; i++) { + ASSERT_EQ(new_lod_tensor.data()[i], i); + } level = 1; new_lod_tensor = lod_tensor_; @@ -104,7 +107,41 @@ TEST_F(LoDTensorTester, ShrinkInLevel) { ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL); - ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); + ASSERT_EQ(new_lod_tensor.dims()[0], 7); + for (int i = 5 * 128; i < 12 * 128; i++) { + ASSERT_EQ(new_lod_tensor.data()[i - 5 * 128], i); + } + + LoDTensor t1; + t1.set_lod(lod_tensor_.lod()); + t1.ShareDataWith(lod_tensor_); + + LoDTensor t2; + t2.set_lod(lod_tensor_.lod()); + t2.ShareDataWith(lod_tensor_); + + t1.ShrinkInLevel(0, 1, 2); + t2.ShrinkInLevel(0, 0, 1); + EXPECT_NE(t1.data(), t2.data()); + EXPECT_NE(t1.data(), lod_tensor_.data()); +} + +TEST(LodExpand, test) { + LoD lod{{0, 2}}; + LoDTensor tensor; + tensor.set_lod(lod); + tensor.Resize({2, 1}); + tensor.mutable_data(platform::CPUPlace()); + tensor.data()[0] = 0; + tensor.data()[1] = 1; + + LoD target; + target.emplace_back(std::vector{0, 3, 5}); + auto new_tensor = LodExpand(tensor, target, 0UL, platform::CPUPlace()); + std::vector result{{0, 0, 0, 1, 1}}; + for (size_t i = 0; i < 5; i++) { + ASSERT_EQ(new_tensor.data()[i], result[i]); + } } TEST_F(LoDTensorTester, SerializeDeserialize) { diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc index 4c82c36383..6f0b84dd1a 100644 --- a/paddle/framework/tensor_array.cc +++ b/paddle/framework/tensor_array.cc @@ -20,6 +20,8 @@ #include #include +#include "paddle/framework/eigen.h" + namespace paddle { namespace framework { @@ -104,10 +106,10 @@ void TensorArray::Write(size_t index, const LoDTensor& value) { values_.resize(index + 1); } + values_[index].set_lod(value.lod()); values_[index].Resize(value.dims()); - values_[index].mutable_data(platform::CPUPlace()); - values_[index].CopyFrom(value, platform::CPUPlace(), - platform::CPUDeviceContext()); + values_[index].mutable_data(value.place()); + values_[index].CopyFrom(value, value.place(), platform::CPUDeviceContext()); } void TensorArray::WriteShared(size_t index, const LoDTensor& value) { @@ -116,6 +118,7 @@ void TensorArray::WriteShared(size_t index, const LoDTensor& value) { values_.resize(index + 1); } + values_[index].set_lod(value.lod()); values_[index].ShareDataWith(value); } @@ -144,6 +147,156 @@ DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level, return unpacker.meta; } +LoDTensor TensorArray::LodPack(size_t level) const { + PADDLE_ENFORCE_GT(size(), 0UL, "no time step exists"); + // the levels should be no less than 2 + LoDTensor merged; + const LoDTensor *pre, *cur; + pre = &Read(0); + + for (size_t step = 1; step < size(); step++) { + cur = &Read(step); + PADDLE_ENFORCE_GT(cur->NumLevels(), 0); + PADDLE_ENFORCE_GT(pre->NumLevels(), 0); + PADDLE_ENFORCE_EQ(pre->NumLevels(), cur->NumLevels()); + PADDLE_ENFORCE_EQ(pre->NumElements(level), cur->NumElements(level)); + + merged = LodPackTwo(*pre, *cur, level); + pre = &merged; + } + return merged; +} + +/* + * NOTE currently, only the lowest level supports packing. + * The lowest LoD will be changed, while the relative offsets in levels above + * stay unchanged. + * + * previous step : [0] [1] [3] + * current step: [0 1 2] [2 3] [] + * packed to + * [0 0] [0 1] [0 2] [1 2] [1 3] [3] + */ +LoDTensor TensorArray::LodPackTwo(const LoDTensor& pre, const LoDTensor& cur, + size_t level) const { + PADDLE_ENFORCE_EQ(pre.NumLevels(), cur.NumLevels()); + PADDLE_ENFORCE_EQ(pre.NumLevels(), level + 1, + "Only the lowest LoD level supports pack temporarily."); + // calculate the result tensor's shape first + size_t num_instances = 0; + for (size_t elem = 0; elem < pre.NumElements(level); elem++) { + size_t prefix_size = pre.NumElements(level, elem); + size_t num_candidates = cur.NumElements(level, elem); + if (num_candidates > 0) { + num_instances += num_candidates * (prefix_size + 1); + } else { + num_instances += prefix_size; + } + } + + auto res_dims = pre.dims(); + res_dims[0] = num_instances; + LoDTensor result; + result.Resize(res_dims); + result.mutable_data(cur.place()); + + Vector last_lod_level; + // copy data + size_t index = 0; + last_lod_level.push_back(index); + for (size_t elem = 0; elem < pre.NumElements(level); elem++) { + size_t prefix_size = pre.NumElements(level, elem); + size_t num_candidates = cur.NumElements(level, elem); + + // slice the prefix Tensor + LoDTensor prefix = pre; + prefix.ShrinkInLevel(level, elem, elem + 1); + LoDTensor candidate = cur; + if (num_candidates > 0) { + candidate.ShrinkInLevel(level, elem, elem + 1); + } else { // just push prefix + result.Slice(index, index + prefix_size) + .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); + index += prefix_size; + last_lod_level.push_back(index); + } + for (size_t candi = 0; candi < num_candidates; candi++) { + // TODO(superjom) support GPU + result.Slice(index, index + prefix_size) + .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); + index += prefix_size; + // copy candidate record + result.Slice(index, index + 1) + .CopyFrom(candidate.Slice(candi, candi + 1), result.place(), + platform::CPUDeviceContext()); + index++; + last_lod_level.push_back(index); + } + } + + // update lod + auto lod = cur.lod(); + lod.back() = last_lod_level; + result.set_lod(lod); + return result; +} + +/* + * source [0 1 2] [3 4] [5 6 7] will be transformd to a list of LoDTensors such + * as + * [0 3 5] [1 4 6] [2 7] with 1-level LoDs: + * - [0 1 2 3] + * - [0 1 2 3] + * - [0 1 1 2], the [1,1) here means the second sequence is empty + * + * NOTE Unpack a LoDTensor in this approach may result in a big LoD. + */ +void TensorArray::LodUnpack(const LoDTensor& source, size_t level) { + PADDLE_ENFORCE_EQ(level, source.NumLevels() - 1, + "only the lowest LoD level supports unpack."); + int non_empty_instances = -1; + size_t index = 0; + Vector lowest_lod_level; + lowest_lod_level.push_back(index); + + for (size_t step = 0; non_empty_instances > 0 || non_empty_instances == -1; + step++) { + size_t num_instances = 0; + for (size_t id = 0; id < source.NumElements(level); id++) { + auto instance = source; + instance.ShrinkInLevel(level, id, id + 1); + if (static_cast(instance.dims()[0]) > step) { + num_instances++; + index++; + } + lowest_lod_level.push_back(index); + } + + // create tensor for this time step + LoDTensor tensor; + auto dims = source.dims(); + dims[0] = num_instances; + // set lod + auto lod = source.lod(); + lod.back() = lowest_lod_level; + tensor.set_lod(lod); + + index = 0; + for (size_t id = 0; id < source.NumElements(level); id++) { + auto instance = source; + instance.ShrinkInLevel(level, id, id + 1); + if (static_cast(instance.dims()[0]) > step) { + // copy this instance + tensor.Slice(index, index + 1) + .CopyFrom(instance.Slice(step, step + 1), tensor.place(), + platform::CPUDeviceContext()); + index++; + } + } + Write(step, tensor); + } +} + LoDTensor TensorArray::Stack() const { LoDTensor result; if (size() == 0) return result; diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h index 046ecb5221..78fad8cab7 100644 --- a/paddle/framework/tensor_array.h +++ b/paddle/framework/tensor_array.h @@ -86,6 +86,16 @@ class TensorArray { */ DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend); + /* + * Pack an array of LoDTensors to a LoDTensor. + */ + LoDTensor LodPack(size_t level) const; + + /* + * Unpack a LoDTensor to an array of LoDTensors. + */ + void LodUnpack(const LoDTensor &source, size_t level); + /* * Pack the values into a tensor with rank one higher than each tensor in * values. @@ -111,6 +121,9 @@ class TensorArray { protected: void Unstack(const LoDTensor &source, bool data_shared) const; + LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur, + size_t level) const; + private: mutable std::vector values_; }; // class TensorArray diff --git a/paddle/framework/tensor_array_test.cc b/paddle/framework/tensor_array_test.cc index 9470ac5e6e..83b52b442d 100644 --- a/paddle/framework/tensor_array_test.cc +++ b/paddle/framework/tensor_array_test.cc @@ -126,5 +126,57 @@ TEST_F(TensorArrayTester, size) { ASSERT_EQ(ta.size(), static_cast(batch_size)); } +TEST(TensorArray, LodPack) { + // three time steps, each step stores a LoDTensors + // - [0] [1] + // - [2 3], [4 5] + // - [6 7] [] [8], [9, 10] + // try to get a LoDTensor with content: + // - [0 2 6] + // - [0 2 7] + // - [0 3] + // - [1 4 8] + // - [1 5 9] + // - [1 5 10] + std::array tensors; + tensors[0].Resize(make_ddim({2, 1})); + tensors[1].Resize(make_ddim({4, 1})); + tensors[2].Resize(make_ddim({5, 1})); + int index = 0; + for (auto& t : tensors) { + t.mutable_data(platform::CPUPlace()); + for (int i = 0; i < t.dims()[0]; i++) { + t.data()[i] = index; + index++; + } + } + + std::array lods; + std::vector> levels{ + {0, 1, 2}, {0, 2, 4}, {0, 2, 2, 3, 5}}; + for (int i = 0; i < 3; i++) { + lods[i].emplace_back(levels[i].begin(), levels[i].end()); + } + + TensorArray ta; + for (int i = 0; i < 3; i++) { + tensors[i].set_lod(lods[i]); + ta.Write(i, tensors[i]); + } + + auto merged = ta.LodPack(0); + + std::vector target_tensor_data{{0, 2, 6, // 0 + 0, 2, 7, // 1 + 0, 3, // 2 + 1, 4, 8, // 3 + 1, 5, 9, // 5 + 1, 5, 10}}; + EXPECT_EQ(merged.dims()[0], (int)target_tensor_data.size()); + for (size_t i = 0; i < target_tensor_data.size(); i++) { + EXPECT_EQ(target_tensor_data[i], merged.data()[i]); + } +} + } // namespace framework } // namespace paddle -- GitLab