提交 0d0fd3fb 编写于 作者: G gongweibao

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into omitlstmunit

...@@ -47,6 +47,15 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -47,6 +47,15 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. SequencePoolOp pools features of all time-steps of each instance.
It supports six pooling strategy:
- AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]}
- SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]}
- SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]}
/ sqrt(i-th sequence length)
- LAST: Out[i] = last instance in i-th sequence X[i]
- FIRST: Out[i] = first instance in i-th sequence X[i]
- MAX: Out[i] = max_{for each instance in i-th sequence}{X[i]}
For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps: For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps:
Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2. Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
......
...@@ -82,6 +82,9 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -82,6 +82,9 @@ class SequencePoolKernel : public framework::OpKernel<T> {
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) / out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h)); std::sqrt(static_cast<T>(h));
break; break;
case MAX:
out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
break;
case LAST: case LAST:
out_e.device(place) = in_e.chip(h - 1, 0); out_e.device(place) = in_e.chip(h - 1, 0);
break; break;
...@@ -100,8 +103,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -100,8 +103,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X")); auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
int strategy = context.Attr<int>("strategy"); int strategy = context.Attr<int>("strategy");
auto dims = in->dims(); auto dims = in->dims();
...@@ -135,6 +138,22 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -135,6 +138,22 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
in_g_e.device(place) = in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast); (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
break; break;
case MAX: {
auto in_t =
in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
in_t_map(in_t.data<T>(), h, w);
int row_id;
Eigen::array<int, 2> extents = {1, 1};
for (int col_id = 0; col_id < w; col_id++) {
in_t_map.col(col_id).maxCoeff(&row_id);
Eigen::array<int, 2> in_offsets = {row_id, col_id};
Eigen::array<int, 2> out_offsets = {0, col_id};
in_g_e.slice(in_offsets, extents).device(place) =
out_g_e.slice(out_offsets, extents);
}
break;
}
case LAST: case LAST:
in_g_e.chip(h - 1, 0).device(place) = out_g_e; in_g_e.chip(h - 1, 0).device(place) = out_g_e;
break; break;
......
...@@ -22,18 +22,17 @@ class TestSeqAvgPool(OpTest): ...@@ -22,18 +22,17 @@ class TestSeqAvgPool(OpTest):
out = np.zeros((4, 23)).astype('float32') out = np.zeros((4, 23)).astype('float32')
self.outputs = {'Out': out} self.outputs = {'Out': out}
return x, lod, out
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.AVERAGE} self.attrs = {'strategy': SeqPoolType.AVERAGE}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x.mean(axis=0) out[i] = sub_x.mean(axis=0)
def setUp(self): def setUp(self):
self.set_data() x, lod, out = self.set_data()
self.compute() self.compute(x, lod, out)
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -52,41 +51,34 @@ class TestSeqAvgPool2D(TestSeqAvgPool): ...@@ -52,41 +51,34 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
out = np.zeros((4, 3, 17)).astype('float32') out = np.zeros((4, 3, 17)).astype('float32')
self.outputs = {'Out': out} self.outputs = {'Out': out}
return x, lod, out
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.AVERAGE} self.attrs = {'strategy': SeqPoolType.AVERAGE}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) out[i] = np.reshape(sub_x.mean(axis=0), (3, 17))
class TestSeqSumPool(TestSeqAvgPool): class TestSeqSumPool(TestSeqAvgPool):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SUM} self.attrs = {'strategy': SeqPoolType.SUM}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x.sum(axis=0) out[i] = sub_x.sum(axis=0)
class TestSeqSumPool2D(TestSeqAvgPool2D): class TestSeqSumPool2D(TestSeqAvgPool2D):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SUM} self.attrs = {'strategy': SeqPoolType.SUM}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
class TestSeqSqrtPool(TestSeqAvgPool): class TestSeqSqrtPool(TestSeqAvgPool):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SQRT} self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
len = lod[0][i + 1] - lod[0][i] len = lod[0][i + 1] - lod[0][i]
...@@ -94,10 +86,8 @@ class TestSeqSqrtPool(TestSeqAvgPool): ...@@ -94,10 +86,8 @@ class TestSeqSqrtPool(TestSeqAvgPool):
class TestSeqSqrtPool2D(TestSeqAvgPool2D): class TestSeqSqrtPool2D(TestSeqAvgPool2D):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SQRT} self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
len = lod[0][i + 1] - lod[0][i] len = lod[0][i + 1] - lod[0][i]
...@@ -107,41 +97,57 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): ...@@ -107,41 +97,57 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D):
self.check_grad(["X"], "Out", max_relative_error=0.06) self.check_grad(["X"], "Out", max_relative_error=0.06)
class TestSeqMaxPool(TestSeqAvgPool):
def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.MAX}
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = np.amax(sub_x, axis=0)
def test_check_grad(self):
# Remove MaxPool2D from gradient check to confirm the success of CI.
return
class TestSeqMaxPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.MAX}
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17))
def test_check_grad(self):
# Remove MaxPool2D from gradient check to confirm the success of CI.
return
class TestSeqLastPool(TestSeqAvgPool): class TestSeqLastPool(TestSeqAvgPool):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.LAST} self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[-1, :] out[i] = sub_x[-1, :]
class TestSeqLastPool2D(TestSeqAvgPool2D): class TestSeqLastPool2D(TestSeqAvgPool2D):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.LAST} self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[-1, :], (3, 17)) out[i] = np.reshape(sub_x[-1, :], (3, 17))
class TestSeqFirstPool(TestSeqAvgPool): class TestSeqFirstPool(TestSeqAvgPool):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.FIRST} self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[0, :] out[i] = sub_x[0, :]
class TestSeqFirstPool2D(TestSeqAvgPool2D): class TestSeqFirstPool2D(TestSeqAvgPool2D):
def compute(self): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.FIRST} self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[0, :], (3, 17)) out[i] = np.reshape(sub_x[0, :], (3, 17))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册