From 296167446c35228c7e259677d82a3c85b896a7b5 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 24 Oct 2017 14:10:02 +0800 Subject: [PATCH] Rewrite sequence expand op --- paddle/framework/lod_tensor.cc | 23 ---- paddle/framework/lod_tensor.h | 3 - paddle/operators/seq_expand_op.cc | 109 +++++++-------- paddle/operators/seq_expand_op.h | 128 +++++------------- python/paddle/v2/framework/tests/op_test.py | 2 - .../v2/framework/tests/test_seq_expand.py | 96 +++---------- 6 files changed, 97 insertions(+), 264 deletions(-) diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index a7b2b5b1ec..7c0ea0df78 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -112,28 +112,5 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, lod_ = new_lod; } -Vector expand_lod(Vector level, Vector indexes, - Vector scales, bool repeat) { - Vector result; - result.push_back(level[0]); - size_t start = 0, end = 0; - if (!repeat) { - for (size_t i = 0; i < scales.size(); ++i) { - result.push_back(result.back() + scales[i] * (level[i + 1] - level[i])); - } - } else { - for (size_t i = 0; i < scales.size(); ++i) { - start = indexes[i]; - end = indexes[i + 1]; - for (size_t j = 0; j < scales[i]; ++j) { - for (size_t index = start; index < end - 1; ++index) { - result.push_back(result.back() + level[index + 1] - level[index]); - } - } - } - } - return result; -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index ec0b34878b..3895d3cb83 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -136,8 +136,5 @@ class LoDTensor : public Tensor { LoD lod_; }; -Vector expand_lod(Vector level, Vector indexes, - Vector scales, bool repeat); - } // namespace framework } // namespace paddle diff --git a/paddle/operators/seq_expand_op.cc b/paddle/operators/seq_expand_op.cc index d02a94d164..660e86e9cc 100644 --- a/paddle/operators/seq_expand_op.cc +++ b/paddle/operators/seq_expand_op.cc @@ -27,20 +27,14 @@ class SeqExpandOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of SeqExpandOp should not be null."); - int repeat = ctx->Attrs().Get("repeat"); - framework::DDim out_dim; - if (repeat == 0) { - PADDLE_ENFORCE( - ctx->HasInput("Y"), - "Input(Y) of SeqExpandOp should not be null while repeat == 0."); - out_dim = ctx->GetInputDim("Y"); - ctx->ShareLoD("Y", "Out"); - } else { - out_dim = ctx->GetInputDim("X"); - out_dim[0] = out_dim[0] * repeat; - } PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SeqExpandOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("Y"), + "Input(Y) of SeqExpandOp should not be null while repeat == 0."); + framework::DDim out_dim; + out_dim = ctx->GetInputDim("Y"); + ctx->ShareLoD("Y", "Out"); ctx->SetOutputDim("Out", out_dim); } }; @@ -50,68 +44,63 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker { SeqExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "X", - "The input('X') of seq_expand op. It can be LoDTensor or base Tensor."); - AddInput( - "Y", - "The reference input('Y') of seq_expand op." - "It must be a LoDTensor with k-level(k>0)." - "This reference input is essential if 'repeat' attribute is not " - "configured." - "Input(X) will be expanded by LoD of input(Y) while repeat == 0."); + AddInput("X", + "(Tensor or LoDTensor) The input('X') of this operator can be a " + "LoDTensor or a base Tensor."); + AddInput("Y", + "(LoDTensor)The reference input('Y') of seq_expand op." + "It must be a LoDTensor with k-level(k>0)." + "Input(X) will be expanded according to LOD of input(Y)." + "The element numbers of last level in input('Y') " + "must be equal to dims[0] of input('X')."); AddOutput("Out", "The output of seq_expand op." - "The output is a (k+1)-level LoDTensor" - "while input(X) being k-level LoDTensor." - "(Given base tensor is 0-level LoDTensor.)"); - AddAttr("repeat", - "(type:int; default value: 0)" - "Repeatting times of each element while expanding input(X)." - "It works while input(Y) is not configured.") - .SetDefault(0); + "The lod of output will be as same as input(Y)'s lod."); AddComment(R"DOC( -Expand k-level LoDTensor to (k+1)-level LoDTensor -by lod of input(Y) or 'repeat' attribute. +Expand input(X) according to LOD of input(Y). Case 1: -Given a 2-level LoDTensor X: - X.data = [a, b , c, d] - X.lod = [[0, 3, 4], [0, 1, 3, 4]] -and - repeat = 2 -then we get 3-level LoDTensor - Out.lod = [[0, 6, 8], - [0, 3, 6, 7, 8], - [0, 1, 3, 4, 6, 7, 8]] - Out.data = [a, b, c, a, b, c, d, d] +Given 2-level a LoDTensor input(X) + X.lod = [[0, 2, 3], + [0, 1, 3, 4]] + X.data = [a, b, c, d] + X.dims = [4, 1] +and input(Y) + Y.lod = [[0, 2, 4], + [0, 3, 6, 7, 8]] +then we get 2-level LoDTensor + Out.lod = [[0, 2, 4], + [0, 3, 6, 7, 8]] + Out.data = [a, a, a, b, b, b, c, d] + Out.dims = [8, 1] Case 2: -Given 2-level a LoDTensor X - X.data = [1, 2, 3, 4] - X.lod = [[0, 3, 4], [0, 1, 3, 4]] -and - Y.lod = [[0, 6, 8], - [0, 3, 6, 7, 8], - [0,1,3,4,6,7,8]] -then we get 3-level LoDTensor - Out.data = [1, 2, 3, 1, 2, 3, 4, 4] - Out.lod = [[0, 6, 8], - [0, 3, 6, 7, 8], - [0, 1, 3, 4, 6, 7, 8]] +Given a 0-level LoDTensor input(X) + X.data = [a, b, c] + X.lod = NULL + X.dims = [3, 1] +and input(Y) + Y.lod = [[0, 2, 3, 6]] +then we get 1-level LoDTensor + Out.lod = [[0, 2, 3, 6]] + Out.data = [a, a, b, c, c, c] + Out.dims = [6, 1] Case 3: -Given a 0-level LoDTensor X - X.data = [1, 2, 3, 4] +Given a 0-level LoDTensor input(X) + X.data = [[a, b], [c, d], [e, f]] X.lod = NULL -and - repeat = 2 + X.dims = [3, 2] +and input(Y) + Y.lod = [[0, 2, 3, 6]] then we get 1-level LoDTensor - Out.data = [1, 1, 2, 2, 3, 3, 4, 4] - Out.lod = [[0, 2, 4, 6, 8]] + Out.lod = [[0, 2, 3, 6]] + Out.data = [[a,b], [a,b] [c,d], [e, f], [e, f], [e, f]] + Out.dims = [6, 2] + )DOC"); } diff --git a/paddle/operators/seq_expand_op.h b/paddle/operators/seq_expand_op.h index e31f60db49..ad3f42116d 100644 --- a/paddle/operators/seq_expand_op.h +++ b/paddle/operators/seq_expand_op.h @@ -31,93 +31,28 @@ class SeqExpandKernel : public framework::OpKernel { auto* out = context.Output("Out"); const T* x_data = x->data(); auto x_dims = x->dims(); - auto x_lod = x->lod(); - - framework::Vector level; - size_t num = (x_lod.size() == 0) ? (x->dims()[0] + 1) : x_lod[0].size(); - for (int i = 0; i < num; ++i) { - level.push_back(i); - } - x_lod.push_back(level); - - size_t repeat = static_cast(context.Attr("repeat")); - framework::Vector scales; - if (repeat != 0) { - for (int i = 0; i < x_lod[0].size() - 1; ++i) { - scales.push_back(repeat); - } - std::vector dims = framework::vectorize(x->dims()); - dims[0] = dims[0] * repeat; - auto out_dims = framework::make_ddim(dims); - out->Resize(out_dims); - } else { - auto* y = context.Input("Y"); - auto y_lod = y->lod(); - auto y_abs_lod = y_lod.ToAbsOffset(); - auto x_abs_lod = x_lod.ToAbsOffset(); - for (int i = 0; i < y_abs_lod[0].size() - 1; ++i) { - scales.push_back((y_abs_lod[0][i + 1] - y_abs_lod[0][i]) / - (x_abs_lod[0][i + 1] - x_abs_lod[0][i])); - } - out->Resize(y->dims()); - } - - framework::Vector indexes; - for (int size_t i = 0; i < x_lod[0]; ++i) { - indexes[i] = x_lod[0]; - } - framework::LoD out_lod; - auto level0 = framework::expand_lod(indexes, x_lod[0], scales, false); - out_lod.push_back(level0); - for (int i = 1; i < x_lod.size(); ++i) { - for (int j = 0; j < indexes.size(); ++j) { - indexes[j] = x_lod[i - 1][indexes[j]]; - } - out_lod.push_back(framework::expand_lod(x_lod[i], indexes, scales, true)); - } - + auto* y = context.Input("Y"); + PADDLE_ENFORCE_EQ(x_dims[0], y->lod().back().size() - 1, + "The size of last lod level in Input(Y)" + "must be equal to dims[0] of Input(X)."); + out->set_lod(y->lod()); + out->Resize(y->dims()); + auto place = context.GetEigenDevice(); size_t element_len = framework::product(x_dims) / x_dims[0]; T* out_data = out->mutable_data(context.GetPlace()); - - // copy data - auto place = context.GetPlace(); - size_t count = 0; - if (platform::is_cpu_place(place)) { - auto& cpu_place = boost::get(place); - for (size_t i = 0; i < scales.size(); ++i) { - count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]); - for (size_t j = 0; j < scales[i]; ++j) { - memory::Copy(cpu_place, out_data, cpu_place, x_data, - sizeof(T) * count); - out_data += count; - } - x_data += count; - } - } else { -#ifdef PADDLE_WITH_CUDA - auto& gpu_place = boost::get(place); - auto stream = reinterpret_cast( - context.device_context()) - .stream(); - for (size_t i = 0; i < scales.size(); ++i) { - count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]); - for (size_t j = 0; j < scales[i]; ++j) { - memory::Copy(gpu_place, out_data, gpu_place, x_data, - sizeof(T) * count, stream); - out_data += count; - } - x_data += count; - } -#else - PADDLE_THROW("Paddle is not compiled with GPU"); -#endif - } - - out->set_lod(out_lod); - for (size_t i = 0; i < lod.size; i++) { - for (size_t j = 0; j < lod[i].size(); j++) { - LOG(INFO) << "lod[" << i << "][" << j "] = " << lod[i][j]; - } + auto out_starts = out->lod().back(); + + for (size_t i = 0; i < out_starts.size() - 1; i++) { + int scale = out_starts[i + 1] - out_starts[i]; + Eigen::TensorMap< + Eigen::Tensor> + x_t(x_data, 1, element_len); + Eigen::TensorMap> + out_t(out_data, scale, element_len); + Eigen::array cast({scale, 1}); + out_t.device(place) = x_t.broadcast(cast); + x_data += element_len; + out_data += element_len * scale; } } }; @@ -130,25 +65,24 @@ class SeqExpandGradKernel : public framework::OpKernel { auto* x = context.Input("X"); auto* out = context.Input("Out"); auto* d_x = context.Output(framework::GradVarName("X")); - auto out_lod = out->lod(); - auto out_abs_lod = out_lod.ToAbsOffset(); + auto out_last_level = out->lod().back(); d_x->set_lod(x->lod()); const T* d_out_data = d_out->data(); auto d_out_dims = d_out->dims(); T* d_x_data = d_x->mutable_data(context.GetPlace()); size_t element_len = framework::product(d_out_dims) / d_out_dims[0]; - for (size_t i = 0; i < out->NumElements(); ++i) { - size_t ele_count = out_abs_lod[0][i + 1] - out_abs_lod[0][i]; - size_t repeat = out->NumElements(0, i); - Eigen::TensorMap> d_out_t( - d_out_data, static_cast(repeat), - static_cast((ele_count * element_len) / repeat)); - Eigen::TensorMap> d_x_t( - d_x_data, static_cast((ele_count * element_len) / repeat)); + + for (size_t i = 0; i < out_last_level.size() - 1; ++i) { + size_t repeat = out_last_level[i + 1] - out_last_level[i]; + Eigen::TensorMap< + Eigen::Tensor> + d_out_t(d_out_data, static_cast(repeat), element_len); + Eigen::TensorMap> + d_x_t(d_x_data, static_cast(element_len)); auto place = context.GetEigenDevice(); d_x_t.device(place) = d_out_t.sum(Eigen::array({{0}})); - d_out_data += (ele_count * element_len); - d_x_data += ((ele_count * element_len) / repeat); + d_out_data += (repeat * element_len); + d_x_data += element_len; } } }; diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index f3108d5108..a88e9f0bb8 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -246,8 +246,6 @@ class OpTest(unittest.TestCase): else: actual = np.array(self.scope.find_var(out_name).get_tensor()) expect = self.outputs[out_name] - print "actual= %s" % actual - print "expect = %s" % expect self.assertTrue( np.allclose( actual, expect, atol=atol), diff --git a/python/paddle/v2/framework/tests/test_seq_expand.py b/python/paddle/v2/framework/tests/test_seq_expand.py index 2910af6b78..901102802b 100644 --- a/python/paddle/v2/framework/tests/test_seq_expand.py +++ b/python/paddle/v2/framework/tests/test_seq_expand.py @@ -3,66 +3,21 @@ import numpy as np from op_test import OpTest -def repeat(list, starts, times, is_first): - newlist = [list[0]] - if is_first: - for i, time in enumerate(times): - size = list[i + 1] - list[i] - newlist.append(newlist[-1] + size * time) - else: - for i, time in enumerate(times): - start = list.index(starts[i]) - end = list.index(starts[i + 1]) + 1 - for t in range(time): - for index in range(start, end - 1): - newlist.append(newlist[-1] + list[index + 1] - list[index]) - return newlist - - -def repeat_array(array, starts, times): - newlist = [] - for i, time in enumerate(times): - for t in range(time): - newlist.extend(array[starts[i]:starts[i + 1]]) - return newlist - - -def toAbsOffset(lod): - for i in range(len(lod) - 2, -1, -1): - for j in range(len(lod[i])): - lod[i][j] = lod[i + 1][lod[i][j]] - return lod - - class TestSeqExpand(OpTest): - #class TestSeqExpand(): def set_data(self): - x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') - self.inputs = {'X': x_data} - self.repeat = 2 + x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') + y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') + y_lod = [[0, 1, 4, 8]] + self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} def compute(self): x = self.inputs['X'] - print "x= %s" % x x_data, x_lod = x if type(x) == tuple else (x, None) n = 1 + x_data.shape[0] if not x_lod else len(x_lod[0]) - x_lod = [[i for i in range(n)]] + x_lod - x_abs_lod = toAbsOffset(x_lod) - if self.repeat: - print "repeat= %s" % self.repeat - self.attrs = {'repeat': self.repeat} - repeats = (len(x_lod[0]) - 1) * [self.repeat] - else: - y_data, y_lod = self.inputs['Y'] - print "y_lod: %s" % y_lod - y_abs_lod = toAbsOffset(y_lod) - repeats = [((y_abs_lod[0][i + 1] - y_abs_lod[0][i]) / - (x_abs_lod[0][i + 1] - x_abs_lod[0][i])) - for i in range(len(y_abs_lod[0]) - 1)] - #out_lod = [repeat(x_lod[0], x_lod[0], repeats, True)] + [ - # repeat(lod, x_lod[0], repeats, False) for lod in x_lod[1:] - #] - out = repeat_array(x_data.tolist(), x_abs_lod[0], repeats) + y_data, y_lod = self.inputs['Y'] + repeats = [((y_lod[-1][i + 1] - y_lod[-1][i])) + for i in range(len(y_lod[-1]) - 1)] + out = x_data.repeat(repeats, axis=0) self.outputs = {'Out': out} def setUp(self): @@ -78,39 +33,22 @@ class TestSeqExpand(OpTest): class TestSeqExpandCase1(TestSeqExpand): - def set_data(self): - x_data = np.random.uniform(0.1, 1, [7, 1]).astype('float32') - x_lod = [[0, 2, 3], [0, 2, 5, 7]] - self.inputs = {'X': (x_data, x_lod)} - self.repeat = 2 - - -class TestSeqExpandCase2(TestSeqExpand): - def set_data(self): - x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') - self.inputs = {'X': x_data} - self.repeat = 2 - - -class TestSeqExpandCase3(TestSeqExpand): - def set_data(self): - x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') - y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') - y_lod = [[0, 1, 4, 8]] - self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} - self.repeat = None - - -class TestSeqExpandCase4(TestSeqExpand): def set_data(self): x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') x_lod = [[0, 2, 5]] y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32') y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]] self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} - self.repeat = None + + +class TestSeqExpandCase2(TestSeqExpand): + def set_data(self): + x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32') + x_lod = [[0, 1]] + y_data = np.random.uniform(0.1, 1, [2, 2, 2]).astype('float32') + y_lod = [[0, 2]] + self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} if __name__ == '__main__': unittest.main() -# TestSeqExpandCase4().setUp() -- GitLab