提交 23701ffa 编写于 作者: W wanghaoshuang

Refine op

上级 f984cba0
...@@ -14,14 +14,62 @@ ...@@ -14,14 +14,62 @@
#pragma once #pragma once
#include "hl_cuda.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/memory/memcpy.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
template <typename T>
using vector = framework::Vector<T>;
vector<size_t> repeat_lod(vector<size_t> data, vector<size_t> starts,
vector<size_t> times, bool is_first) {
vector<size_t> result;
result.push_back(data[0]);
size_t p = 0, start = 0, end = 0;
if (is_first == true) {
for (size_t i = 0; i < times.size(); ++i) {
result.push_back(data.back() + times[i] * (data[i + 1] - data[i]));
}
} else {
for (size_t i = 0; i < times.size(); ++i) {
while (starts[i] != data[p] && p < data.size()) {
++p;
}
start = p;
while (starts[i + 1] != data[p] && p < data.size()) {
++p;
}
end = p + 1;
for (size_t j = 0; j < times[i]; ++j) {
for (size_t index = start; index < end - 1; ++index) {
result.push_back(result.back() + data[index + 1] - data[index]);
}
}
}
}
return result;
}
template <typename Place, typename T>
void repeat_data(const T* src, T* dst, size_t size, vector<size_t> starts,
vector<size_t> times, Place place) {
const T* src_p = src;
T* dst_p = dst;
size_t count = 0;
for (size_t i = 0; i < times.size(); ++i) {
count = size * (starts[i + 1] - starts[i]);
for (size_t j = 0; j < times[i]; ++j) {
memory::Copy(place, dst_p, place, src_p, sizeof(T) * count);
dst_p += count;
}
src_p += count;
}
}
template <typename Place, typename T> template <typename Place, typename T>
class SeqExpandKernel : public framework::OpKernel<T> { class SeqExpandKernel : public framework::OpKernel<T> {
public: public:
...@@ -29,43 +77,52 @@ class SeqExpandKernel : public framework::OpKernel<T> { ...@@ -29,43 +77,52 @@ class SeqExpandKernel : public framework::OpKernel<T> {
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<LoDTensor>("Out");
const T* x_data = x->data<T>(); const T* x_data = x->data<T>();
T* out_data = out->mutable_data<T>(context.GetPlace()); auto x_dims = x->dims();
size_t repeat = static_cast<size_t>(context.Attr<int>("repeat")); auto x_lod = x->lod();
if (repeat != 0) { if (x_lod.size() == 0) {
if (x->lod().size() == 0) { vector<size_t> level;
std::vector<size_t> level0; for (int i = 0; i < x->dims()[0] + 1; ++i) {
for (size_t i = 0; i <= x->dims()[0]; i++) { level.push_back(i);
level0.push_back(i * repeat);
}
framework::LoD out_lod;
out_lod.push_back(level0);
out->set_lod(out_lod);
}
}
auto out_dim = out->dims();
size_t element_len = framework::product(out_dim) / out_dim[0];
std::vector<int> cpy_map(out_dim[0]);
if (x->lod().size() == 0) {
auto lod = out->lod();
for (int i = 0; i < lod.size() - 1; ++i) {
for (int j = lod[0][i]; i < lod[0][i + 1]; ++j) {
cpy_map[j] = i;
}
} }
x_lod.push_back(level);
} else {
x_lod.insert(x_lod.begin(), x_lod[0]);
} }
if (platform::is_cpu_place(context.GetPlace())) {
for (int i = 0; i < out_dim[0]; ++i) { size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
memcpy(out_data + element_len * i, x_data + element_len * cpy_map[i], vector<size_t> repeats;
sizeof(T) * element_len); if (repeat != 0) {
for (int i = 0; i < x_lod[0].size() - 1; ++i) {
repeats.push_back(repeat);
} }
std::vector<int64_t> dims = framework::vectorize(x->dims());
dims[0] = dims[0] * repeat;
auto out_dims = framework::make_ddim(dims);
out->Resize(out_dims);
} else { } else {
for (int i = 0; i < out_dim[0]; ++i) { auto* y = context.Input<LoDTensor>("Y");
hl_memcpy(out_data + element_len * i, auto y_lod = y->lod();
const_cast<T*>(x_data) + element_len * cpy_map[i], for (int i = 0; i < y_lod[0].size() - 1; ++i) {
sizeof(T) * element_len); repeats.push_back((y_lod[0][i + 1] - y_lod[0][i]) /
(x_lod[0][i + 1] - x_lod[0][i]));
} }
out->Resize(x_dims);
} }
framework::LoD out_lod;
auto level0 = repeat_lod(x_lod[0], x_lod[0], repeats, true);
out_lod.push_back(level0);
for (int i = 1; i < x_lod.size(); ++i) {
out_lod.push_back(repeat_lod(x_lod[i], x_lod[0], repeats, false));
}
size_t element_len = framework::product(x_dims) / x_dims[0];
T* out_data = out->mutable_data<T>(context.GetPlace());
Place place = boost::get<Place>(context.GetPlace());
repeat_data<Place, T>(x_data, out_data, element_len, x_lod[0], repeats,
place);
out->set_lod(out_lod);
} }
}; };
......
...@@ -246,7 +246,9 @@ class OpTest(unittest.TestCase): ...@@ -246,7 +246,9 @@ class OpTest(unittest.TestCase):
else: else:
actual = np.array(self.scope.find_var(out_name).get_tensor()) actual = np.array(self.scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name] expect = self.outputs[out_name]
print "out_name: %s" % out_name
print "actual: %s" % actual
print "expcept: %s" % expect
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
actual, expect, atol=atol), actual, expect, atol=atol),
......
...@@ -3,59 +3,119 @@ import numpy as np ...@@ -3,59 +3,119 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def repeat(list, starts, times, is_first):
newlist = [list[0]]
if is_first:
for i, time in enumerate(times):
size = list[i + 1] - list[i]
newlist.append(newlist[-1] + size * time)
else:
for i, time in enumerate(times):
start = list.index(starts[i])
end = list.index(starts[i + 1]) + 1
for t in range(time):
for index in range(start, end - 1):
newlist.append(newlist[-1] + list[index + 1] - list[index])
return newlist
def repeat_array(array, starts, times):
newlist = []
for i, time in enumerate(times):
for t in range(time):
newlist.extend(array[starts[i]:starts[i + 1]])
return newlist
class TestSeqExpand(OpTest): class TestSeqExpand(OpTest):
#class TestSeqExpand():
def set_data(self): def set_data(self):
self.op_type = 'seq_expand' self.op_type = 'seq_expand'
x = np.random.uniform(0.1, 1, [3, 2, 2]).astype('float32') x = np.random.uniform(0.1, 1, [3, 2, 2]).astype('float32')
y = np.zeros((6, 2, 2)).astype('float32') y = np.zeros((6, 2, 2)).astype('float32')
lod = [[0, 2, 3, 6]] y_lod = [[0, 2, 3, 6]]
print "x = %s" % x self.inputs = {'X': (x, None), 'Y': (y, y_lod)}
self.inputs = {'X': x, 'Y': (y, lod)} self.repeat = 2
self.repeat = None
def compute(self): def compute(self):
x = self.inputs['X'] x_data, x_lod = self.inputs['X']
cpy_map = {} print "x_data: %s" % x_data
lod = [] print "x_lod: %s" % x_lod
out_shape = [] if not x_lod:
x_lod = [[i for i in range(1 + x_data.shape[0])]]
else:
x_lod = [x_lod[0]] + x_lod
if self.repeat: if self.repeat:
level0 = [] self.attrs = {'repeat': self.repeat}
for i in range(x.shape[0] + 1): repeats = (len(x_lod[0]) - 1) * [self.repeat]
level0.append(i * self.repeat) # get out shape
lod.append(level0) # out_shape = np.copy(x_data.shape)
# out_shape[0] = out_shape[0] * self.repeat
for i in x.shape:
out_shape.append(i)
out_shape[0] = out_shape[0] * self.repeat
else: else:
y, lod = self.inputs['Y'] y_data, y_lod = self.inputs['Y']
out_shape = y.shape print "y_lod: %s" % y_lod
out = np.zeros(out_shape).astype('float32') #print "y_lod: %s" % y_lod
# get repeats
repeats = [((y_lod[0][i + 1] - y_lod[0][i]) /
(x_lod[0][i + 1] - x_lod[0][i]))
for i in range(len(y_lod[0]) - 1)]
# get out shape
# out_shape = y_data.shape
# get out lod
start = 0 out_lod = [repeat(x_lod[0], x_lod[0], repeats, True)] + [
repeat(lod, x_lod[0], repeats, False) for lod in x_lod[1:]
for i in range(len(lod[0]) - 1): ]
for j in range(lod[0][i], lod[0][i + 1]): # copy data
cpy_map[j] = i out = repeat_array(x_data.tolist(), x_lod[0], repeats)
print "cpy_map = %s" % cpy_map self.outputs = {'Out': (out, out_lod)}
for i in range(len(out)): print "outputs: %s" % self.outputs
out[i] = x[cpy_map[i]]
print "out = %s" % out
self.outputs = {'Out': (out, lod)}
def setUp(self): def setUp(self):
self.op_type = 'seq_expand'
self.set_data() self.set_data()
self.compute() self.compute()
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out") # def test_check_grad(self):
# self.check_grad(["X"], "Out")
class TestSeqExpandCase1(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [7, 1]).astype('float32')
x_lod = [[0, 5, 7], [0, 2, 5, 7]]
self.inputs = {'X': (x_data, x_lod)}
self.repeat = 2
class TestSeqExpandCase2(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
self.inputs = {'X': (x_data, None)}
self.repeat = 2
class TestSeqExpandCase3(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 1, 4, 8]]
self.inputs = {'X': (x_data, None), 'Y': (y_data, y_lod)}
self.repeat = None
class TestSeqExpandCase4(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[0, 2, 5]]
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[0, 4, 13], [0, 2, 4, 7, 10, 13]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
self.repeat = None
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# TestSeqExpand().setUp()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册