diff --git a/paddle/operators/seq_expand_op.h b/paddle/operators/seq_expand_op.h index 0c399fe196ef51c355ae241a8aff3e2695d8804a..cd1182c4f087f2de531671d93e439548b74ae709 100644 --- a/paddle/operators/seq_expand_op.h +++ b/paddle/operators/seq_expand_op.h @@ -14,14 +14,62 @@ #pragma once -#include "hl_cuda.h" #include "paddle/framework/op_registry.h" +#include "paddle/memory/memcpy.h" namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; +template +using vector = framework::Vector; + +vector repeat_lod(vector data, vector starts, + vector times, bool is_first) { + vector result; + result.push_back(data[0]); + size_t p = 0, start = 0, end = 0; + if (is_first == true) { + for (size_t i = 0; i < times.size(); ++i) { + result.push_back(data.back() + times[i] * (data[i + 1] - data[i])); + } + } else { + for (size_t i = 0; i < times.size(); ++i) { + while (starts[i] != data[p] && p < data.size()) { + ++p; + } + start = p; + while (starts[i + 1] != data[p] && p < data.size()) { + ++p; + } + end = p + 1; + for (size_t j = 0; j < times[i]; ++j) { + for (size_t index = start; index < end - 1; ++index) { + result.push_back(result.back() + data[index + 1] - data[index]); + } + } + } + } + return result; +} + +template +void repeat_data(const T* src, T* dst, size_t size, vector starts, + vector times, Place place) { + const T* src_p = src; + T* dst_p = dst; + size_t count = 0; + for (size_t i = 0; i < times.size(); ++i) { + count = size * (starts[i + 1] - starts[i]); + for (size_t j = 0; j < times[i]; ++j) { + memory::Copy(place, dst_p, place, src_p, sizeof(T) * count); + dst_p += count; + } + src_p += count; + } +} + template class SeqExpandKernel : public framework::OpKernel { public: @@ -29,43 +77,52 @@ class SeqExpandKernel : public framework::OpKernel { auto* x = context.Input("X"); auto* out = context.Output("Out"); const T* x_data = x->data(); - T* out_data = out->mutable_data(context.GetPlace()); - size_t repeat = static_cast(context.Attr("repeat")); + auto x_dims = x->dims(); + auto x_lod = x->lod(); - if (repeat != 0) { - if (x->lod().size() == 0) { - std::vector level0; - for (size_t i = 0; i <= x->dims()[0]; i++) { - level0.push_back(i * repeat); - } - framework::LoD out_lod; - out_lod.push_back(level0); - out->set_lod(out_lod); - } - } - auto out_dim = out->dims(); - size_t element_len = framework::product(out_dim) / out_dim[0]; - std::vector cpy_map(out_dim[0]); - if (x->lod().size() == 0) { - auto lod = out->lod(); - for (int i = 0; i < lod.size() - 1; ++i) { - for (int j = lod[0][i]; i < lod[0][i + 1]; ++j) { - cpy_map[j] = i; - } + if (x_lod.size() == 0) { + vector level; + for (int i = 0; i < x->dims()[0] + 1; ++i) { + level.push_back(i); } + x_lod.push_back(level); + } else { + x_lod.insert(x_lod.begin(), x_lod[0]); } - if (platform::is_cpu_place(context.GetPlace())) { - for (int i = 0; i < out_dim[0]; ++i) { - memcpy(out_data + element_len * i, x_data + element_len * cpy_map[i], - sizeof(T) * element_len); + + size_t repeat = static_cast(context.Attr("repeat")); + vector repeats; + if (repeat != 0) { + for (int i = 0; i < x_lod[0].size() - 1; ++i) { + repeats.push_back(repeat); } + std::vector dims = framework::vectorize(x->dims()); + dims[0] = dims[0] * repeat; + auto out_dims = framework::make_ddim(dims); + out->Resize(out_dims); } else { - for (int i = 0; i < out_dim[0]; ++i) { - hl_memcpy(out_data + element_len * i, - const_cast(x_data) + element_len * cpy_map[i], - sizeof(T) * element_len); + auto* y = context.Input("Y"); + auto y_lod = y->lod(); + for (int i = 0; i < y_lod[0].size() - 1; ++i) { + repeats.push_back((y_lod[0][i + 1] - y_lod[0][i]) / + (x_lod[0][i + 1] - x_lod[0][i])); } + out->Resize(x_dims); } + + framework::LoD out_lod; + auto level0 = repeat_lod(x_lod[0], x_lod[0], repeats, true); + out_lod.push_back(level0); + for (int i = 1; i < x_lod.size(); ++i) { + out_lod.push_back(repeat_lod(x_lod[i], x_lod[0], repeats, false)); + } + + size_t element_len = framework::product(x_dims) / x_dims[0]; + T* out_data = out->mutable_data(context.GetPlace()); + Place place = boost::get(context.GetPlace()); + repeat_data(x_data, out_data, element_len, x_lod[0], repeats, + place); + out->set_lod(out_lod); } }; diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 81067f38bbf64ac1ab4ccf02aa43b0a38b7d48ad..0b0de78caf9a075effa1739514cc23076b238d27 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -246,7 +246,9 @@ class OpTest(unittest.TestCase): else: actual = np.array(self.scope.find_var(out_name).get_tensor()) expect = self.outputs[out_name] - + print "out_name: %s" % out_name + print "actual: %s" % actual + print "expcept: %s" % expect self.assertTrue( np.allclose( actual, expect, atol=atol), diff --git a/python/paddle/v2/framework/tests/test_seq_expand.py b/python/paddle/v2/framework/tests/test_seq_expand.py index 4608d3c3bd6f3bdd4b61c5227884a1085afdb5a2..854148a8f1fe9a97423bb28771d9c9272227f4a0 100644 --- a/python/paddle/v2/framework/tests/test_seq_expand.py +++ b/python/paddle/v2/framework/tests/test_seq_expand.py @@ -3,59 +3,119 @@ import numpy as np from op_test import OpTest +def repeat(list, starts, times, is_first): + newlist = [list[0]] + if is_first: + for i, time in enumerate(times): + size = list[i + 1] - list[i] + newlist.append(newlist[-1] + size * time) + else: + for i, time in enumerate(times): + start = list.index(starts[i]) + end = list.index(starts[i + 1]) + 1 + for t in range(time): + for index in range(start, end - 1): + newlist.append(newlist[-1] + list[index + 1] - list[index]) + return newlist + + +def repeat_array(array, starts, times): + newlist = [] + for i, time in enumerate(times): + for t in range(time): + newlist.extend(array[starts[i]:starts[i + 1]]) + return newlist + + class TestSeqExpand(OpTest): - #class TestSeqExpand(): def set_data(self): self.op_type = 'seq_expand' x = np.random.uniform(0.1, 1, [3, 2, 2]).astype('float32') y = np.zeros((6, 2, 2)).astype('float32') - lod = [[0, 2, 3, 6]] - print "x = %s" % x - self.inputs = {'X': x, 'Y': (y, lod)} - self.repeat = None + y_lod = [[0, 2, 3, 6]] + self.inputs = {'X': (x, None), 'Y': (y, y_lod)} + self.repeat = 2 def compute(self): - x = self.inputs['X'] - cpy_map = {} - lod = [] - out_shape = [] + x_data, x_lod = self.inputs['X'] + print "x_data: %s" % x_data + print "x_lod: %s" % x_lod + if not x_lod: + x_lod = [[i for i in range(1 + x_data.shape[0])]] + else: + x_lod = [x_lod[0]] + x_lod if self.repeat: - level0 = [] - for i in range(x.shape[0] + 1): - level0.append(i * self.repeat) - lod.append(level0) - - for i in x.shape: - out_shape.append(i) - out_shape[0] = out_shape[0] * self.repeat + self.attrs = {'repeat': self.repeat} + repeats = (len(x_lod[0]) - 1) * [self.repeat] + # get out shape + # out_shape = np.copy(x_data.shape) + # out_shape[0] = out_shape[0] * self.repeat else: - y, lod = self.inputs['Y'] - out_shape = y.shape - out = np.zeros(out_shape).astype('float32') + y_data, y_lod = self.inputs['Y'] + print "y_lod: %s" % y_lod + #print "y_lod: %s" % y_lod + # get repeats + repeats = [((y_lod[0][i + 1] - y_lod[0][i]) / + (x_lod[0][i + 1] - x_lod[0][i])) + for i in range(len(y_lod[0]) - 1)] + # get out shape + # out_shape = y_data.shape + # get out lod - start = 0 - - for i in range(len(lod[0]) - 1): - for j in range(lod[0][i], lod[0][i + 1]): - cpy_map[j] = i - print "cpy_map = %s" % cpy_map - for i in range(len(out)): - out[i] = x[cpy_map[i]] - - print "out = %s" % out - self.outputs = {'Out': (out, lod)} + out_lod = [repeat(x_lod[0], x_lod[0], repeats, True)] + [ + repeat(lod, x_lod[0], repeats, False) for lod in x_lod[1:] + ] + # copy data + out = repeat_array(x_data.tolist(), x_lod[0], repeats) + self.outputs = {'Out': (out, out_lod)} + print "outputs: %s" % self.outputs def setUp(self): + self.op_type = 'seq_expand' self.set_data() self.compute() def test_check_output(self): self.check_output() - def test_check_grad(self): - self.check_grad(["X"], "Out") + +# def test_check_grad(self): +# self.check_grad(["X"], "Out") + + +class TestSeqExpandCase1(TestSeqExpand): + def set_data(self): + x_data = np.random.uniform(0.1, 1, [7, 1]).astype('float32') + x_lod = [[0, 5, 7], [0, 2, 5, 7]] + self.inputs = {'X': (x_data, x_lod)} + self.repeat = 2 + + +class TestSeqExpandCase2(TestSeqExpand): + def set_data(self): + x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') + self.inputs = {'X': (x_data, None)} + self.repeat = 2 + + +class TestSeqExpandCase3(TestSeqExpand): + def set_data(self): + x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') + y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') + y_lod = [[0, 1, 4, 8]] + self.inputs = {'X': (x_data, None), 'Y': (y_data, y_lod)} + self.repeat = None + + +class TestSeqExpandCase4(TestSeqExpand): + def set_data(self): + x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') + x_lod = [[0, 2, 5]] + y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32') + y_lod = [[0, 4, 13], [0, 2, 4, 7, 10, 13]] + self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} + self.repeat = None if __name__ == '__main__': unittest.main() -# TestSeqExpand().setUp()