提交 440ad999 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #4788 from luotao1/seqpool

 add SQRT/LAST/FIRST strategy for Seqpool
...@@ -36,11 +36,10 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,11 +36,10 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
SequencePoolOpMaker(framework::OpProto* proto, SequencePoolOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp");
"A float LoDTensor, the variable-length input of SequencePoolOp"); AddOutput("Out",
AddOutput( "(Tensor), output of SequencePoolOp, which does not contain LoD "
"Out", "infomation.");
"A float LoDTensor, the variable-length output of SequencePoolOp.");
AddAttr<int>( AddAttr<int>(
"strategy", "strategy",
"(int, default AVERAGE) the pooling strategy of SequencePoolOp.") "(int, default AVERAGE) the pooling strategy of SequencePoolOp.")
...@@ -49,13 +48,13 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,13 +48,13 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. SequencePoolOp pools features of all time-steps of each instance.
For a mini-batch of 3 variable lengths sentences, containing 2, 3, and 2 time-steps: For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps:
Assume X is a [7,M,N] float LoDTensor, and X->lod()[0] = [0, 2, 5, 7]. Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
Besides, for the sake of simplicity, we assume M=1 and N=1, Besides, for the sake of simplicity, we assume M=1 and N=1,
and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
Thus, Out is a [3,1,1] float LoDTensor, but Out->lod() is nullptr. Thus, Out is a [3,1,1] Tensor without LoD infomation.
And for different strategy, the value of Out is as follows: And for different strategy, the value of Out is as follows:
- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -77,6 +78,16 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -77,6 +78,16 @@ class SequencePoolKernel : public framework::OpKernel<T> {
case SUM: case SUM:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})); out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}}));
break; break;
case SQRT:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h));
break;
case LAST:
out_e.device(place) = in_e.chip(h - 1, 0);
break;
case FIRST:
out_e.device(place) = in_e.chip(0, 0);
break;
default: default:
PADDLE_THROW("unsupported pooling strategy"); PADDLE_THROW("unsupported pooling strategy");
} }
...@@ -98,6 +109,10 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -98,6 +109,10 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
int64_t w = in->numel() / dims[0]; int64_t w = in->numel() / dims[0];
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
if (strategy == LAST || strategy == FIRST) {
// set X@Grad be zero at first when strategy is LAST/FIRST
math::SetConstant<Place, T>(context.device_context(), in_g, 0);
}
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
auto in_g_t = in_g->Slice<T>(static_cast<int>(lod[i]), auto in_g_t = in_g->Slice<T>(static_cast<int>(lod[i]),
...@@ -115,6 +130,16 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -115,6 +130,16 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
case SUM: case SUM:
in_g_e.device(place) = (out_g_e).broadcast(bcast); in_g_e.device(place) = (out_g_e).broadcast(bcast);
break; break;
case SQRT:
in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
break;
case LAST:
in_g_e.chip(h - 1, 0).device(place) = out_g_e;
break;
case FIRST:
in_g_e.chip(0, 0).device(place) = out_g_e;
break;
default: default:
PADDLE_THROW("unsupported pooling strategy"); PADDLE_THROW("unsupported pooling strategy");
} }
......
...@@ -82,5 +82,70 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): ...@@ -82,5 +82,70 @@ class TestSeqSumPool2D(TestSeqAvgPool2D):
out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
class TestSeqSqrtPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
len = lod[0][i + 1] - lod[0][i]
out[i] = sub_x.sum(axis=0) / np.sqrt(len)
class TestSeqSqrtPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
len = lod[0][i + 1] - lod[0][i]
out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17))
def test_check_grad(self):
self.check_grad(["X"], "Out", max_relative_error=0.06)
class TestSeqLastPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[-1, :]
class TestSeqLastPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[-1, :], (3, 17))
class TestSeqFirstPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[0, :]
class TestSeqFirstPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[0, :], (3, 17))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册