提交 29616744 编写于 作者: W wanghaoshuang

Rewrite sequence expand op

上级 4e8fccff
...@@ -112,28 +112,5 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ...@@ -112,28 +112,5 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
lod_ = new_lod; lod_ = new_lod;
} }
Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> indexes,
Vector<size_t> scales, bool repeat) {
Vector<size_t> result;
result.push_back(level[0]);
size_t start = 0, end = 0;
if (!repeat) {
for (size_t i = 0; i < scales.size(); ++i) {
result.push_back(result.back() + scales[i] * (level[i + 1] - level[i]));
}
} else {
for (size_t i = 0; i < scales.size(); ++i) {
start = indexes[i];
end = indexes[i + 1];
for (size_t j = 0; j < scales[i]; ++j) {
for (size_t index = start; index < end - 1; ++index) {
result.push_back(result.back() + level[index + 1] - level[index]);
}
}
}
}
return result;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -136,8 +136,5 @@ class LoDTensor : public Tensor { ...@@ -136,8 +136,5 @@ class LoDTensor : public Tensor {
LoD lod_; LoD lod_;
}; };
Vector<size_t> expand_lod(Vector<size_t> level, Vector<size_t> indexes,
Vector<size_t> scales, bool repeat);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -27,20 +27,14 @@ class SeqExpandOp : public framework::OperatorWithKernel { ...@@ -27,20 +27,14 @@ class SeqExpandOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SeqExpandOp should not be null."); "Input(X) of SeqExpandOp should not be null.");
int repeat = ctx->Attrs().Get<int>("repeat");
framework::DDim out_dim;
if (repeat == 0) {
PADDLE_ENFORCE(
ctx->HasInput("Y"),
"Input(Y) of SeqExpandOp should not be null while repeat == 0.");
out_dim = ctx->GetInputDim("Y");
ctx->ShareLoD("Y", "Out");
} else {
out_dim = ctx->GetInputDim("X");
out_dim[0] = out_dim[0] * repeat;
}
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SeqExpandOp should not be null."); "Output(Out) of SeqExpandOp should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("Y"),
"Input(Y) of SeqExpandOp should not be null while repeat == 0.");
framework::DDim out_dim;
out_dim = ctx->GetInputDim("Y");
ctx->ShareLoD("Y", "Out");
ctx->SetOutputDim("Out", out_dim); ctx->SetOutputDim("Out", out_dim);
} }
}; };
...@@ -50,68 +44,63 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -50,68 +44,63 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker {
SeqExpandOpMaker(framework::OpProto* proto, SeqExpandOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor or LoDTensor) The input('X') of this operator can be a "
"The input('X') of seq_expand op. It can be LoDTensor or base Tensor."); "LoDTensor or a base Tensor.");
AddInput( AddInput("Y",
"Y", "(LoDTensor)The reference input('Y') of seq_expand op."
"The reference input('Y') of seq_expand op." "It must be a LoDTensor with k-level(k>0)."
"It must be a LoDTensor with k-level(k>0)." "Input(X) will be expanded according to LOD of input(Y)."
"This reference input is essential if 'repeat' attribute is not " "The element numbers of last level in input('Y') "
"configured." "must be equal to dims[0] of input('X').");
"Input(X) will be expanded by LoD of input(Y) while repeat == 0.");
AddOutput("Out", AddOutput("Out",
"The output of seq_expand op." "The output of seq_expand op."
"The output is a (k+1)-level LoDTensor" "The lod of output will be as same as input(Y)'s lod.");
"while input(X) being k-level LoDTensor."
"(Given base tensor is 0-level LoDTensor.)");
AddAttr<int>("repeat",
"(type:int; default value: 0)"
"Repeatting times of each element while expanding input(X)."
"It works while input(Y) is not configured.")
.SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
Expand k-level LoDTensor to (k+1)-level LoDTensor Expand input(X) according to LOD of input(Y).
by lod of input(Y) or 'repeat' attribute.
Case 1: Case 1:
Given a 2-level LoDTensor X: Given 2-level a LoDTensor input(X)
X.data = [a, b , c, d] X.lod = [[0, 2, 3],
X.lod = [[0, 3, 4], [0, 1, 3, 4]] [0, 1, 3, 4]]
and X.data = [a, b, c, d]
repeat = 2 X.dims = [4, 1]
then we get 3-level LoDTensor and input(Y)
Out.lod = [[0, 6, 8], Y.lod = [[0, 2, 4],
[0, 3, 6, 7, 8], [0, 3, 6, 7, 8]]
[0, 1, 3, 4, 6, 7, 8]] then we get 2-level LoDTensor
Out.data = [a, b, c, a, b, c, d, d] Out.lod = [[0, 2, 4],
[0, 3, 6, 7, 8]]
Out.data = [a, a, a, b, b, b, c, d]
Out.dims = [8, 1]
Case 2: Case 2:
Given 2-level a LoDTensor X Given a 0-level LoDTensor input(X)
X.data = [1, 2, 3, 4] X.data = [a, b, c]
X.lod = [[0, 3, 4], [0, 1, 3, 4]] X.lod = NULL
and X.dims = [3, 1]
Y.lod = [[0, 6, 8], and input(Y)
[0, 3, 6, 7, 8], Y.lod = [[0, 2, 3, 6]]
[0,1,3,4,6,7,8]] then we get 1-level LoDTensor
then we get 3-level LoDTensor Out.lod = [[0, 2, 3, 6]]
Out.data = [1, 2, 3, 1, 2, 3, 4, 4] Out.data = [a, a, b, c, c, c]
Out.lod = [[0, 6, 8], Out.dims = [6, 1]
[0, 3, 6, 7, 8],
[0, 1, 3, 4, 6, 7, 8]]
Case 3: Case 3:
Given a 0-level LoDTensor X Given a 0-level LoDTensor input(X)
X.data = [1, 2, 3, 4] X.data = [[a, b], [c, d], [e, f]]
X.lod = NULL X.lod = NULL
and X.dims = [3, 2]
repeat = 2 and input(Y)
Y.lod = [[0, 2, 3, 6]]
then we get 1-level LoDTensor then we get 1-level LoDTensor
Out.data = [1, 1, 2, 2, 3, 3, 4, 4] Out.lod = [[0, 2, 3, 6]]
Out.lod = [[0, 2, 4, 6, 8]] Out.data = [[a,b], [a,b] [c,d], [e, f], [e, f], [e, f]]
Out.dims = [6, 2]
)DOC"); )DOC");
} }
......
...@@ -31,93 +31,28 @@ class SeqExpandKernel : public framework::OpKernel<T> { ...@@ -31,93 +31,28 @@ class SeqExpandKernel : public framework::OpKernel<T> {
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<LoDTensor>("Out");
const T* x_data = x->data<T>(); const T* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
auto x_lod = x->lod(); auto* y = context.Input<LoDTensor>("Y");
PADDLE_ENFORCE_EQ(x_dims[0], y->lod().back().size() - 1,
framework::Vector<size_t> level; "The size of last lod level in Input(Y)"
size_t num = (x_lod.size() == 0) ? (x->dims()[0] + 1) : x_lod[0].size(); "must be equal to dims[0] of Input(X).");
for (int i = 0; i < num; ++i) { out->set_lod(y->lod());
level.push_back(i); out->Resize(y->dims());
} auto place = context.GetEigenDevice<Place>();
x_lod.push_back(level);
size_t repeat = static_cast<size_t>(context.Attr<int>("repeat"));
framework::Vector<size_t> scales;
if (repeat != 0) {
for (int i = 0; i < x_lod[0].size() - 1; ++i) {
scales.push_back(repeat);
}
std::vector<int64_t> dims = framework::vectorize(x->dims());
dims[0] = dims[0] * repeat;
auto out_dims = framework::make_ddim(dims);
out->Resize(out_dims);
} else {
auto* y = context.Input<LoDTensor>("Y");
auto y_lod = y->lod();
auto y_abs_lod = y_lod.ToAbsOffset();
auto x_abs_lod = x_lod.ToAbsOffset();
for (int i = 0; i < y_abs_lod[0].size() - 1; ++i) {
scales.push_back((y_abs_lod[0][i + 1] - y_abs_lod[0][i]) /
(x_abs_lod[0][i + 1] - x_abs_lod[0][i]));
}
out->Resize(y->dims());
}
framework::Vector<size_t> indexes;
for (int size_t i = 0; i < x_lod[0]; ++i) {
indexes[i] = x_lod[0];
}
framework::LoD out_lod;
auto level0 = framework::expand_lod(indexes, x_lod[0], scales, false);
out_lod.push_back(level0);
for (int i = 1; i < x_lod.size(); ++i) {
for (int j = 0; j < indexes.size(); ++j) {
indexes[j] = x_lod[i - 1][indexes[j]];
}
out_lod.push_back(framework::expand_lod(x_lod[i], indexes, scales, true));
}
size_t element_len = framework::product(x_dims) / x_dims[0]; size_t element_len = framework::product(x_dims) / x_dims[0];
T* out_data = out->mutable_data<T>(context.GetPlace()); T* out_data = out->mutable_data<T>(context.GetPlace());
auto out_starts = out->lod().back();
// copy data
auto place = context.GetPlace(); for (size_t i = 0; i < out_starts.size() - 1; i++) {
size_t count = 0; int scale = out_starts[i + 1] - out_starts[i];
if (platform::is_cpu_place(place)) { Eigen::TensorMap<
auto& cpu_place = boost::get<platform::CPUPlace>(place); Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
for (size_t i = 0; i < scales.size(); ++i) { x_t(x_data, 1, element_len);
count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]); Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
for (size_t j = 0; j < scales[i]; ++j) { out_t(out_data, scale, element_len);
memory::Copy(cpu_place, out_data, cpu_place, x_data, Eigen::array<int, 2> cast({scale, 1});
sizeof(T) * count); out_t.device(place) = x_t.broadcast(cast);
out_data += count; x_data += element_len;
} out_data += element_len * scale;
x_data += count;
}
} else {
#ifdef PADDLE_WITH_CUDA
auto& gpu_place = boost::get<platform::GPUPlace>(place);
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
context.device_context())
.stream();
for (size_t i = 0; i < scales.size(); ++i) {
count = element_len * (x_abs_lod[0][i + 1] - x_abs_lod[0][i]);
for (size_t j = 0; j < scales[i]; ++j) {
memory::Copy(gpu_place, out_data, gpu_place, x_data,
sizeof(T) * count, stream);
out_data += count;
}
x_data += count;
}
#else
PADDLE_THROW("Paddle is not compiled with GPU");
#endif
}
out->set_lod(out_lod);
for (size_t i = 0; i < lod.size; i++) {
for (size_t j = 0; j < lod[i].size(); j++) {
LOG(INFO) << "lod[" << i << "][" << j "] = " << lod[i][j];
}
} }
} }
}; };
...@@ -130,25 +65,24 @@ class SeqExpandGradKernel : public framework::OpKernel<T> { ...@@ -130,25 +65,24 @@ class SeqExpandGradKernel : public framework::OpKernel<T> {
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
auto* out = context.Input<LoDTensor>("Out"); auto* out = context.Input<LoDTensor>("Out");
auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X")); auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
auto out_lod = out->lod(); auto out_last_level = out->lod().back();
auto out_abs_lod = out_lod.ToAbsOffset();
d_x->set_lod(x->lod()); d_x->set_lod(x->lod());
const T* d_out_data = d_out->data<T>(); const T* d_out_data = d_out->data<T>();
auto d_out_dims = d_out->dims(); auto d_out_dims = d_out->dims();
T* d_x_data = d_x->mutable_data<T>(context.GetPlace()); T* d_x_data = d_x->mutable_data<T>(context.GetPlace());
size_t element_len = framework::product(d_out_dims) / d_out_dims[0]; size_t element_len = framework::product(d_out_dims) / d_out_dims[0];
for (size_t i = 0; i < out->NumElements(); ++i) {
size_t ele_count = out_abs_lod[0][i + 1] - out_abs_lod[0][i]; for (size_t i = 0; i < out_last_level.size() - 1; ++i) {
size_t repeat = out->NumElements(0, i); size_t repeat = out_last_level[i + 1] - out_last_level[i];
Eigen::TensorMap<Eigen::Tensor<const T, 2>> d_out_t( Eigen::TensorMap<
d_out_data, static_cast<int>(repeat), Eigen::Tensor<const T, 2, Eigen::RowMajor, Eigen::DenseIndex>>
static_cast<int>((ele_count * element_len) / repeat)); d_out_t(d_out_data, static_cast<int>(repeat), element_len);
Eigen::TensorMap<Eigen::Tensor<T, 1>> d_x_t( Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>
d_x_data, static_cast<int>((ele_count * element_len) / repeat)); d_x_t(d_x_data, static_cast<int>(element_len));
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
d_x_t.device(place) = d_out_t.sum(Eigen::array<int, 1>({{0}})); d_x_t.device(place) = d_out_t.sum(Eigen::array<int, 1>({{0}}));
d_out_data += (ele_count * element_len); d_out_data += (repeat * element_len);
d_x_data += ((ele_count * element_len) / repeat); d_x_data += element_len;
} }
} }
}; };
......
...@@ -246,8 +246,6 @@ class OpTest(unittest.TestCase): ...@@ -246,8 +246,6 @@ class OpTest(unittest.TestCase):
else: else:
actual = np.array(self.scope.find_var(out_name).get_tensor()) actual = np.array(self.scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name] expect = self.outputs[out_name]
print "actual= %s" % actual
print "expect = %s" % expect
self.assertTrue( self.assertTrue(
np.allclose( np.allclose(
actual, expect, atol=atol), actual, expect, atol=atol),
......
...@@ -3,66 +3,21 @@ import numpy as np ...@@ -3,66 +3,21 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def repeat(list, starts, times, is_first):
newlist = [list[0]]
if is_first:
for i, time in enumerate(times):
size = list[i + 1] - list[i]
newlist.append(newlist[-1] + size * time)
else:
for i, time in enumerate(times):
start = list.index(starts[i])
end = list.index(starts[i + 1]) + 1
for t in range(time):
for index in range(start, end - 1):
newlist.append(newlist[-1] + list[index + 1] - list[index])
return newlist
def repeat_array(array, starts, times):
newlist = []
for i, time in enumerate(times):
for t in range(time):
newlist.extend(array[starts[i]:starts[i + 1]])
return newlist
def toAbsOffset(lod):
for i in range(len(lod) - 2, -1, -1):
for j in range(len(lod[i])):
lod[i][j] = lod[i + 1][lod[i][j]]
return lod
class TestSeqExpand(OpTest): class TestSeqExpand(OpTest):
#class TestSeqExpand():
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
self.inputs = {'X': x_data} y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
self.repeat = 2 y_lod = [[0, 1, 4, 8]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
def compute(self): def compute(self):
x = self.inputs['X'] x = self.inputs['X']
print "x= %s" % x
x_data, x_lod = x if type(x) == tuple else (x, None) x_data, x_lod = x if type(x) == tuple else (x, None)
n = 1 + x_data.shape[0] if not x_lod else len(x_lod[0]) n = 1 + x_data.shape[0] if not x_lod else len(x_lod[0])
x_lod = [[i for i in range(n)]] + x_lod y_data, y_lod = self.inputs['Y']
x_abs_lod = toAbsOffset(x_lod) repeats = [((y_lod[-1][i + 1] - y_lod[-1][i]))
if self.repeat: for i in range(len(y_lod[-1]) - 1)]
print "repeat= %s" % self.repeat out = x_data.repeat(repeats, axis=0)
self.attrs = {'repeat': self.repeat}
repeats = (len(x_lod[0]) - 1) * [self.repeat]
else:
y_data, y_lod = self.inputs['Y']
print "y_lod: %s" % y_lod
y_abs_lod = toAbsOffset(y_lod)
repeats = [((y_abs_lod[0][i + 1] - y_abs_lod[0][i]) /
(x_abs_lod[0][i + 1] - x_abs_lod[0][i]))
for i in range(len(y_abs_lod[0]) - 1)]
#out_lod = [repeat(x_lod[0], x_lod[0], repeats, True)] + [
# repeat(lod, x_lod[0], repeats, False) for lod in x_lod[1:]
#]
out = repeat_array(x_data.tolist(), x_abs_lod[0], repeats)
self.outputs = {'Out': out} self.outputs = {'Out': out}
def setUp(self): def setUp(self):
...@@ -78,39 +33,22 @@ class TestSeqExpand(OpTest): ...@@ -78,39 +33,22 @@ class TestSeqExpand(OpTest):
class TestSeqExpandCase1(TestSeqExpand): class TestSeqExpandCase1(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [7, 1]).astype('float32')
x_lod = [[0, 2, 3], [0, 2, 5, 7]]
self.inputs = {'X': (x_data, x_lod)}
self.repeat = 2
class TestSeqExpandCase2(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
self.inputs = {'X': x_data}
self.repeat = 2
class TestSeqExpandCase3(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 1, 4, 8]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
self.repeat = None
class TestSeqExpandCase4(TestSeqExpand):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[0, 2, 5]] x_lod = [[0, 2, 5]]
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32') y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]] y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
self.repeat = None
class TestSeqExpandCase2(TestSeqExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32')
x_lod = [[0, 1]]
y_data = np.random.uniform(0.1, 1, [2, 2, 2]).astype('float32')
y_lod = [[0, 2]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# TestSeqExpandCase4().setUp()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册