From e43073e898ab816ee06a1666f2bb740ced8ed113 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 27 Sep 2017 15:59:13 -0400 Subject: [PATCH] design of TensorArray to simplify varient/dynamic RNNs (#4447) * some enforce change * add LoDTensor::NumElements(id,id) * update * init * finish * refacter interfaces --- doc/design/tensor_array.md | 73 +++++++++++++++++++++++++++++ paddle/framework/lod_tensor.cc | 16 +++++++ paddle/framework/lod_tensor.h | 34 +++++++++++--- paddle/framework/lod_tensor_test.cc | 8 +++- 4 files changed, 123 insertions(+), 8 deletions(-) create mode 100644 doc/design/tensor_array.md diff --git a/doc/design/tensor_array.md b/doc/design/tensor_array.md new file mode 100644 index 00000000000..a0419ec0021 --- /dev/null +++ b/doc/design/tensor_array.md @@ -0,0 +1,73 @@ +# Design for TensorArray +TensorArray as a new concept is borrowed from TensorFlow, +it is meant to be used with dynamic iteration primitives such as `while_loop` and `map_fn`. + +This concept can be used to support our new design of dynamic operations, and help to refactor some existing variant-sentence-related layers, +such as `RecurrentGradientMachine`. + +In [our design for dynamic RNN](https://github.com/PaddlePaddle/Paddle/pull/4401), +`TensorArray` is used to segment inputs and store states in all time steps. +By providing some methods similar to a C++ array, +the definition of some state-based dynamic models such as RNN could be more natural and highly flexible. + +## Dynamic-Related Methods +Some basic methods should be proposed as follows: + +### stack() +Pack the values in a `TensorArray` into a tensor with rank one higher than each tensor in `values`. +### unstack(axis=0) +Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. +### concat() +Return the values in the `TensorArray` as a concatenated Tensor. +### write(index, value, data_shared=true) +Write value into index of the TensorArray. +### read(index) +Read the value at location `index` in the `TensorArray`. +### size() +Return the number of values. + +## LoDTensor-related Supports +The `RecurrentGradientMachine` in Paddle serves as a flexible RNN layer; it takes variant length sequences as input, +because each step of RNN could only take a tensor-represented batch of data as input, +some preprocess should be taken on the inputs such as sorting the sentences by their length in descending order and cut each word and pack to new batches. + +Such cut-like operations can be embedded into `TensorArray` as general methods called `unpack` and `pack`. + +With these two methods, a variant-sentence-RNN can be implemented like + +```c++ +// input is the varient-length data +LodTensor sentence_input(xxx); +TensorArray ta; +Tensor indice_map; +Tensor boot_state = xxx; // to initialize rnn's first state +TensorArray::unpack(input, 1/*level*/, true/*sort_by_length*/, &ta, &indice_map); +TessorArray step_outputs; +TensorArray states; + +for (int step = 0; step = ta.size(); step++) { + auto state = states.read(step); + // rnnstep is a function which acts like a step of RNN + auto step_input = ta.read(step); + auto step_output = rnnstep(step_input, state); + step_outputs.write(step_output, true/*data_shared*/); +} + +// rnn_output is the final output of an rnn +LoDTensor rnn_output = ta.pack(ta, indice_map); +``` +the code above shows that by embedding the LoDTensor-related preprocess operations into `TensorArray`, +the implementation of a RNN that supports varient-length sentences is far more concise than `RecurrentGradientMachine` because the latter mixes all the codes together, hard to read and extend. + + +some details are as follows. + +### unpack(level, sort_by_length) +Split LodTensor in some `level` and generate batches, if set `sort_by_length`, will sort by length. + +Returns: + +- a new `TensorArray`, whose values are LodTensors and represents batches of data. +- an int32 Tensor, which stores the map from the new batch's indices to original LoDTensor +### pack(level, indices_map) +Recover the original LoD-arranged LoDTensor with the values in a `TensorArray` and `level` and `indices_map`. diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 3c349637cdb..5b7badf89c1 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -72,6 +72,22 @@ bool operator==(const LoD& a, const LoD& b) { return true; } +size_t LoDTensor::NumElements(size_t level, size_t idx) const { + PADDLE_ENFORCE_LT(level, NumLevels()); + PADDLE_ENFORCE_LT(idx, NumElements(level)); + // the last level of LoD, just return number of records in Tensor + if (level == NumLevels() - 1) { + return lod_[level][idx + 1] - lod_[level][idx]; + } + // high level of LoD, and there is another lower level, return number of + // lower-level elements + auto tmp = SliceInLevel(lod_, level, idx, idx + 1); + PADDLE_ENFORCE_GE(tmp.size(), 2); + // there is a 0 as a placeholder stored in LoD, so the number of elements + // equals lod.size() - 1 + return tmp[1].size() - 1; +} + void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) { auto new_lod = framework::SliceLevels(lod_, level_begin, level_end); lod_ = new_lod; diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 82f58464264..49786a4a663 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -38,6 +38,18 @@ using Vector = thrust::host_vector< T, thrust::system::cuda::experimental::pinned_allocator>; #endif +/* + * 3-level LoD stores + * + * 0 10 20 + * 0 5 10 15 20 + * 0 2 5 7 10 12 15 20 + * + * - in a level, each element indicates offset in the underlying Tensor + * - the first element should be 0 and that indicates that this sequence start + * from 0 + * - each sequence's begin and end(no-inclusive) is level[id, id+1] + */ using LoD = std::vector>; LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end); @@ -65,11 +77,8 @@ class LoDTensor : public Tensor { * Get a element from LoD. */ size_t lod_element(size_t level, size_t elem) const { - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); - PADDLE_ENFORCE(elem < NumElements(level), - "element begin [%d] out of range [%d]", elem, - NumElements(level)); + PADDLE_ENFORCE_LT(level, NumLevels()); + PADDLE_ENFORCE_LT(elem, NumElements(level)); return (lod_)[level][elem]; } @@ -82,12 +91,23 @@ class LoDTensor : public Tensor { * Number of elements in a level. */ size_t NumElements(size_t level = 0) const { - PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, - NumLevels()); + PADDLE_ENFORCE_LT(level, NumLevels()); // the last offset is the end of last element return (lod_)[level].size() - 1; } + /* + * Number of lower-level elements. + * For example, a 2-level lod-tensor + * + * 0-th level | | + * 1-th level || ||| + * + * NumElements(0, 0) get 2 + * NumElements(0, 1) get 3 + */ + size_t NumElements(size_t level, size_t idx) const; + /* * Shrink levels[level_begin:level_end] */ diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 486b839738e..44f09f584fb 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -56,6 +56,12 @@ TEST_F(LoDTensorTester, NumElements) { ASSERT_EQ(lod_tensor_.NumElements(2), 8UL); } +TEST_F(LoDTensorTester, NumElements2) { + ASSERT_EQ(lod_tensor_.NumElements(0, 0), 2UL); + ASSERT_EQ(lod_tensor_.NumElements(0, 1), 2UL); + ASSERT_EQ(lod_tensor_.NumElements(1, 1), 2UL); +} + TEST_F(LoDTensorTester, ShrinkLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { @@ -65,7 +71,7 @@ TEST_F(LoDTensorTester, ShrinkLevels) { ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } - // slice 2 level + // shrink 2 level for (size_t level = 0; level < 2UL; ++level) { LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.ShrinkLevels(level, level + 2); -- GitLab