提交 23701ffa 编写于 作者: W wanghaoshuang

Refine op

上级 f984cba0
......@@ -14,14 +14,62 @@
#pragma once
#include "hl_cuda.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memcpy.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
template <typename T>
using vector = framework::Vector<T>;
vector<size_t> repeat_lod(vector<size_t> data, vector<size_t> starts,
vector<size_t> times, bool is_first) {
vector<size_t> result;
result.push_back(data[0]);
size_t p = 0, start = 0, end = 0;
if (is_first == true) {
for (size_t i = 0; i < times.size(); ++i) {
result.push_back(data.back() + times[i] * (data[i + 1] - data[i]));
}
} else {
for (size_t i = 0; i < times.size(); ++i) {
while (starts[i] != data[p] && p < data.size()) {
++p;
}
start = p;
while (starts[i + 1] != data[p] && p < data.size()) {
++p;
}
end = p + 1;
for (size_t j = 0; j < times[i]; ++j) {
for (size_t index = start; index < end - 1; ++index) {
result.push_back(result.back() + data[index + 1] - data[index]);
}
}
}
}
return result;
}
template <typename Place, typename T>
void repeat_data(const T* src, T* dst, size_t size, vector<size_t> starts,
vector<size_t> times, Place place) {
const T* src_p = src;
T* dst_p = dst;
size_t count = 0;
for (size_t i = 0; i < times.size(); ++i) {
count = size * (starts[i + 1] - starts[i]);
for (size_t j = 0; j < times[i]; ++j) {
memory::Copy(place, dst_p, place, src_p, sizeof(T) * count);
dst_p += count;
}
src_p += count;
}
}
template <typename Place, typename T>
class SeqExpandKernel : public framework::OpKernel<T> {
public:
......@@ -29,43 +77,52 @@ class SeqExpandKernel : public framework::OpKernel<T> {
auto* x = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out");
const T* x_data = x->data<T>();
T* out_data = out->mutable_data<T>(context.GetPlace());
size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
auto x_dims = x->dims();
auto x_lod = x->lod();
if (repeat != 0) {
if (x->lod().size() == 0) {
std::vector<size_t> level0;
for (size_t i = 0; i <= x->dims()[0]; i++) {
level0.push_back(i * repeat);
}
framework::LoD out_lod;
out_lod.push_back(level0);
out->set_lod(out_lod);
}
}
auto out_dim = out->dims();
size_t element_len = framework::product(out_dim) / out_dim[0];
std::vector<int> cpy_map(out_dim[0]);
if (x->lod().size() == 0) {
auto lod = out->lod();
for (int i = 0; i < lod.size() - 1; ++i) {
for (int j = lod[0][i]; i < lod[0][i + 1]; ++j) {
cpy_map[j] = i;
}
if (x_lod.size() == 0) {
vector<size_t> level;
for (int i = 0; i < x->dims()[0] + 1; ++i) {
level.push_back(i);
}
x_lod.push_back(level);
} else {
x_lod.insert(x_lod.begin(), x_lod[0]);
}
if (platform::is_cpu_place(context.GetPlace())) {
for (int i = 0; i < out_dim[0]; ++i) {
memcpy(out_data + element_len * i, x_data + element_len * cpy_map[i],
sizeof(T) * element_len);
size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
vector<size_t> repeats;
if (repeat != 0) {
for (int i = 0; i < x_lod[0].size() - 1; ++i) {
repeats.push_back(repeat);
}
std::vector<int64_t> dims = framework::vectorize(x->dims());
dims[0] = dims[0] * repeat;
auto out_dims = framework::make_ddim(dims);
out->Resize(out_dims);
} else {
for (int i = 0; i < out_dim[0]; ++i) {
hl_memcpy(out_data + element_len * i,
const_cast<T*>(x_data) + element_len * cpy_map[i],
sizeof(T) * element_len);
auto* y = context.Input<LoDTensor>("Y");
auto y_lod = y->lod();
for (int i = 0; i < y_lod[0].size() - 1; ++i) {
repeats.push_back((y_lod[0][i + 1] - y_lod[0][i]) /
(x_lod[0][i + 1] - x_lod[0][i]));
}
out->Resize(x_dims);
}
framework::LoD out_lod;
auto level0 = repeat_lod(x_lod[0], x_lod[0], repeats, true);
out_lod.push_back(level0);
for (int i = 1; i < x_lod.size(); ++i) {
out_lod.push_back(repeat_lod(x_lod[i], x_lod[0], repeats, false));
}
size_t element_len = framework::product(x_dims) / x_dims[0];
T* out_data = out->mutable_data<T>(context.GetPlace());
Place place = boost::get<Place>(context.GetPlace());
repeat_data<Place, T>(x_data, out_data, element_len, x_lod[0], repeats,
place);
out->set_lod(out_lod);
}
};
......
......@@ -246,7 +246,9 @@ class OpTest(unittest.TestCase):
else:
actual = np.array(self.scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name]
print "out_name: %s" % out_name
print "actual: %s" % actual
print "expcept: %s" % expect
self.assertTrue(
np.allclose(
actual, expect, atol=atol),
......
......@@ -3,59 +3,119 @@ import numpy as np
from op_test import OpTest
def repeat(list, starts, times, is_first):
newlist = [list[0]]
if is_first:
for i, time in enumerate(times):
size = list[i + 1] - list[i]
newlist.append(newlist[-1] + size * time)
else:
for i, time in enumerate(times):
start = list.index(starts[i])
end = list.index(starts[i + 1]) + 1
for t in range(time):
for index in range(start, end - 1):
newlist.append(newlist[-1] + list[index + 1] - list[index])
return newlist
def repeat_array(array, starts, times):
newlist = []
for i, time in enumerate(times):
for t in range(time):
newlist.extend(array[starts[i]:starts[i + 1]])
return newlist
class TestSeqExpand(OpTest):
#class TestSeqExpand():
def set_data(self):
self.op_type = 'seq_expand'
x = np.random.uniform(0.1, 1, [3, 2, 2]).astype('float32')
y = np.zeros((6, 2, 2)).astype('float32')
lod = [[0, 2, 3, 6]]
print "x = %s" % x
self.inputs = {'X': x, 'Y': (y, lod)}
self.repeat = None
y_lod = [[0, 2, 3, 6]]
self.inputs = {'X': (x, None), 'Y': (y, y_lod)}
self.repeat = 2
def compute(self):
x = self.inputs['X']
cpy_map = {}
lod = []
out_shape = []
x_data, x_lod = self.inputs['X']
print "x_data: %s" % x_data
print "x_lod: %s" % x_lod
if not x_lod:
x_lod = [[i for i in range(1 + x_data.shape[0])]]
else:
x_lod = [x_lod[0]] + x_lod
if self.repeat:
level0 = []
for i in range(x.shape[0] + 1):
level0.append(i * self.repeat)
lod.append(level0)
for i in x.shape:
out_shape.append(i)
out_shape[0] = out_shape[0] * self.repeat
self.attrs = {'repeat': self.repeat}
repeats = (len(x_lod[0]) - 1) * [self.repeat]
# get out shape
# out_shape = np.copy(x_data.shape)
# out_shape[0] = out_shape[0] * self.repeat
else:
y, lod = self.inputs['Y']
out_shape = y.shape
out = np.zeros(out_shape).astype('float32')
y_data, y_lod = self.inputs['Y']
print "y_lod: %s" % y_lod
#print "y_lod: %s" % y_lod
# get repeats
repeats = [((y_lod[0][i + 1] - y_lod[0][i]) /
(x_lod[0][i + 1] - x_lod[0][i]))
for i in range(len(y_lod[0]) - 1)]
# get out shape
# out_shape = y_data.shape
# get out lod
start = 0
for i in range(len(lod[0]) - 1):
for j in range(lod[0][i], lod[0][i + 1]):
cpy_map[j] = i
print "cpy_map = %s" % cpy_map
for i in range(len(out)):
out[i] = x[cpy_map[i]]
print "out = %s" % out
self.outputs = {'Out': (out, lod)}
out_lod = [repeat(x_lod[0], x_lod[0], repeats, True)] + [
repeat(lod, x_lod[0], repeats, False) for lod in x_lod[1:]
]
# copy data
out = repeat_array(x_data.tolist(), x_lod[0], repeats)
self.outputs = {'Out': (out, out_lod)}
print "outputs: %s" % self.outputs
def setUp(self):
self.op_type = 'seq_expand'
self.set_data()
self.compute()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out")
# def test_check_grad(self):
# self.check_grad(["X"], "Out")
class TestSeqExpandCase1(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [7, 1]).astype('float32')
x_lod = [[0, 5, 7], [0, 2, 5, 7]]
self.inputs = {'X': (x_data, x_lod)}
self.repeat = 2
class TestSeqExpandCase2(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
self.inputs = {'X': (x_data, None)}
self.repeat = 2
class TestSeqExpandCase3(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 1, 4, 8]]
self.inputs = {'X': (x_data, None), 'Y': (y_data, y_lod)}
self.repeat = None
class TestSeqExpandCase4(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[0, 2, 5]]
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[0, 4, 13], [0, 2, 4, 7, 10, 13]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
self.repeat = None
if __name__ == '__main__':
unittest.main()
# TestSeqExpand().setUp()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册