未验证 提交 26492210 编写于 作者: D dzhwinter 提交者: GitHub

Fix/sequence op (#5264)

* "replace enum with string"

* "fix layers"
上级 bcdedecb
...@@ -39,15 +39,14 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -39,15 +39,14 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"(Tensor), output of SequencePoolOp, which does not contain LoD " "(Tensor), output of SequencePoolOp, which does not contain LoD "
"infomation."); "infomation.");
AddAttr<int>( AddAttr<std::string>(
"strategy", "pooltype",
"(int, default AVERAGE) the pooling strategy of SequencePoolOp.") "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.")
.SetDefault(AVERAGE) .SetDefault("AVERAGE");
.InEnum({AVERAGE, SUM, SQRT, MAX, LAST, FIRST});
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. SequencePoolOp pools features of all time-steps of each instance.
It supports six pooling strategy: It supports six pooling pooltype:
- AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]} - AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]}
- SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]} - SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]}
- SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]} - SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]}
...@@ -63,7 +62,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -63,7 +62,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
Thus, Out is a [3,1,1] Tensor without LoD infomation. Thus, Out is a [3,1,1] Tensor without LoD infomation.
And for different strategy, the value of Out is as follows: And for different pooltype, the value of Out is as follows:
- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1 - SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
......
...@@ -29,22 +29,13 @@ template <typename T, int MajorType = Eigen::RowMajor, ...@@ -29,22 +29,13 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum SeqPoolType {
AVERAGE = 0,
SUM = 1,
SQRT = 2, // square_root_n
MAX = 3,
LAST = 4,
FIRST = 5
};
template <typename Place, typename T> template <typename Place, typename T>
class SequencePoolKernel : public framework::OpKernel<T> { class SequencePoolKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<LoDTensor>("Out");
int strategy = context.Attr<int>("strategy"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
auto lod = in->lod(); auto lod = in->lod();
...@@ -71,28 +62,21 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -71,28 +62,21 @@ class SequencePoolKernel : public framework::OpKernel<T> {
auto in_e = EigenMatrix<T>::From(in_t, framework::make_ddim({h, w})); auto in_e = EigenMatrix<T>::From(in_t, framework::make_ddim({h, w}));
auto out_e = EigenVector<T>::Flatten(out_t); auto out_e = EigenVector<T>::Flatten(out_t);
switch (strategy) { if (pooltype == "AVERAGE") {
case AVERAGE:
out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}})); out_e.device(place) = in_e.mean(Eigen::array<int, 1>({{0}}));
break; } else if (pooltype == "SUM") {
case SUM:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})); out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}}));
break; } else if (pooltype == "SQRT") {
case SQRT:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) / out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h)); std::sqrt(static_cast<T>(h));
break; } else if (pooltype == "MAX") {
case MAX:
out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}})); out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
break; } else if (pooltype == "LAST") {
case LAST:
out_e.device(place) = in_e.chip(h - 1, 0); out_e.device(place) = in_e.chip(h - 1, 0);
break; } else if (pooltype == "FIRST") {
case FIRST:
out_e.device(place) = in_e.chip(0, 0); out_e.device(place) = in_e.chip(0, 0);
break; } else {
default: PADDLE_THROW("unsupported pooling pooltype");
PADDLE_THROW("unsupported pooling strategy");
} }
} }
} }
...@@ -105,15 +89,15 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -105,15 +89,15 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X")); auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out")); auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
int strategy = context.Attr<int>("strategy"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
auto lod = in->lod()[0]; auto lod = in->lod()[0];
int64_t w = in->numel() / dims[0]; int64_t w = in->numel() / dims[0];
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
if (strategy == LAST || strategy == FIRST) { if (pooltype == "LAST" || pooltype == "FIRST") {
// set X@Grad be zero at first when strategy is LAST/FIRST // set X@Grad be zero at first when pooltype is LAST/FIRST
math::SetConstant<Place, T> functor; math::SetConstant<Place, T> functor;
functor(context.device_context(), in_g, 0); functor(context.device_context(), in_g, 0);
} }
...@@ -127,18 +111,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -127,18 +111,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
auto out_g_e = EigenMatrix<T>::From(out_g_t, {1, w}); auto out_g_e = EigenMatrix<T>::From(out_g_t, {1, w});
Eigen::DSizes<int, 2> bcast(h, 1); Eigen::DSizes<int, 2> bcast(h, 1);
switch (strategy) { if (pooltype == "AVERAGE") {
case AVERAGE:
in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast); in_g_e.device(place) = (out_g_e / static_cast<T>(h)).broadcast(bcast);
break; } else if (pooltype == "SUM") {
case SUM:
in_g_e.device(place) = (out_g_e).broadcast(bcast); in_g_e.device(place) = (out_g_e).broadcast(bcast);
break; } else if (pooltype == "SQRT") {
case SQRT:
in_g_e.device(place) = in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast); (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
break; } else if (pooltype == "MAX") {
case MAX: {
auto in_t = auto in_t =
in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1])); in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
...@@ -152,16 +132,12 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -152,16 +132,12 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
in_g_e.slice(in_offsets, extents).device(place) = in_g_e.slice(in_offsets, extents).device(place) =
out_g_e.slice(out_offsets, extents); out_g_e.slice(out_offsets, extents);
} }
break; } else if (pooltype == "LAST") {
}
case LAST:
in_g_e.chip(h - 1, 0).device(place) = out_g_e; in_g_e.chip(h - 1, 0).device(place) = out_g_e;
break; } else if (pooltype == "FIRST") {
case FIRST:
in_g_e.chip(0, 0).device(place) = out_g_e; in_g_e.chip(0, 0).device(place) = out_g_e;
break; } else {
default: PADDLE_THROW("unsupported pooling pooltype");
PADDLE_THROW("unsupported pooling strategy");
} }
} }
} }
......
...@@ -351,32 +351,21 @@ def conv2d(input, ...@@ -351,32 +351,21 @@ def conv2d(input,
return helper.append_activation(pre_act) return helper.append_activation(pre_act)
def sequence_pool(input, pool_type, program=None, init_program=None): def sequence_pool(input, pool_type, **kwargs):
# FIXME(dzh) : want to unify the argument of python layer ENUM_POOL_TYPE = set(["MAX", "AVG", "SQRT", "LAST", "FIRST"])
# function. So we ignore some unecessary attributes
ENUM_POOL_TYPE = dict({
"AVERAGE": 0,
"SUM": 1,
"SQRT": 2,
"MAX": 3,
"LAST": 4,
"FIRST": 5
})
if pool_type.upper() not in ENUM_POOL_TYPE: if pool_type.upper() not in ENUM_POOL_TYPE:
raise ValueError("Unknown pool_type: '%s'. It can only be %s.", raise ValueError("Unknown pool_type: '%s'. It can only be %s.",
str(pool_type), " ".join(ENUM_POOL_TYPE.keys())) str(pool_type), " ".join(ENUM_POOL_TYPE))
helper = LayerHelper('sequence_pool', **locals()) helper = LayerHelper('sequence_pool', **kwargs)
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype) pool_out = helper.create_tmp_variable(dtype)
# FIXME(dzh): strategy
helper.append_op( helper.append_op(
type="sequence_pool", type="sequence_pool",
inputs={"X": [input]}, inputs={"X": [input]},
outputs={"Out": [pool_out]}, outputs={"Out": [pool_out]},
attrs={"strategy": ENUM_POOL_TYPE[pool_type.upper()]}) attrs={"pooltype": pool_type.upper()})
return pool_out return pool_out
......
...@@ -3,15 +3,6 @@ import numpy as np ...@@ -3,15 +3,6 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
class SeqPoolType(OpTest):
AVERAGE = 0
SUM = 1
SQRT = 2
MAX = 3
LAST = 4
FIRST = 5
class TestSeqAvgPool(OpTest): class TestSeqAvgPool(OpTest):
def set_data(self): def set_data(self):
self.op_type = 'sequence_pool' self.op_type = 'sequence_pool'
...@@ -25,7 +16,7 @@ class TestSeqAvgPool(OpTest): ...@@ -25,7 +16,7 @@ class TestSeqAvgPool(OpTest):
return x, lod, out return x, lod, out
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.AVERAGE} self.attrs = {'pooltype': "AVERAGE"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x.mean(axis=0) out[i] = sub_x.mean(axis=0)
...@@ -54,7 +45,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool): ...@@ -54,7 +45,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
return x, lod, out return x, lod, out
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.AVERAGE} self.attrs = {'pooltype': "AVERAGE"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) out[i] = np.reshape(sub_x.mean(axis=0), (3, 17))
...@@ -62,7 +53,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool): ...@@ -62,7 +53,7 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
class TestSeqSumPool(TestSeqAvgPool): class TestSeqSumPool(TestSeqAvgPool):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SUM} self.attrs = {'pooltype': "SUM"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x.sum(axis=0) out[i] = sub_x.sum(axis=0)
...@@ -70,7 +61,7 @@ class TestSeqSumPool(TestSeqAvgPool): ...@@ -70,7 +61,7 @@ class TestSeqSumPool(TestSeqAvgPool):
class TestSeqSumPool2D(TestSeqAvgPool2D): class TestSeqSumPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SUM} self.attrs = {'pooltype': "SUM"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
...@@ -78,7 +69,7 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): ...@@ -78,7 +69,7 @@ class TestSeqSumPool2D(TestSeqAvgPool2D):
class TestSeqSqrtPool(TestSeqAvgPool): class TestSeqSqrtPool(TestSeqAvgPool):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SQRT} self.attrs = {'pooltype': "SQRT"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
len = lod[0][i + 1] - lod[0][i] len = lod[0][i + 1] - lod[0][i]
...@@ -87,7 +78,7 @@ class TestSeqSqrtPool(TestSeqAvgPool): ...@@ -87,7 +78,7 @@ class TestSeqSqrtPool(TestSeqAvgPool):
class TestSeqSqrtPool2D(TestSeqAvgPool2D): class TestSeqSqrtPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.SQRT} self.attrs = {'pooltype': "SQRT"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
len = lod[0][i + 1] - lod[0][i] len = lod[0][i + 1] - lod[0][i]
...@@ -99,7 +90,7 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): ...@@ -99,7 +90,7 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D):
class TestSeqMaxPool(TestSeqAvgPool): class TestSeqMaxPool(TestSeqAvgPool):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.MAX} self.attrs = {'pooltype': "MAX"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = np.amax(sub_x, axis=0) out[i] = np.amax(sub_x, axis=0)
...@@ -111,7 +102,7 @@ class TestSeqMaxPool(TestSeqAvgPool): ...@@ -111,7 +102,7 @@ class TestSeqMaxPool(TestSeqAvgPool):
class TestSeqMaxPool2D(TestSeqAvgPool2D): class TestSeqMaxPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.MAX} self.attrs = {'pooltype': "MAX"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17))
...@@ -123,7 +114,7 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): ...@@ -123,7 +114,7 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D):
class TestSeqLastPool(TestSeqAvgPool): class TestSeqLastPool(TestSeqAvgPool):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.LAST} self.attrs = {'pooltype': "LAST"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[-1, :] out[i] = sub_x[-1, :]
...@@ -131,7 +122,7 @@ class TestSeqLastPool(TestSeqAvgPool): ...@@ -131,7 +122,7 @@ class TestSeqLastPool(TestSeqAvgPool):
class TestSeqLastPool2D(TestSeqAvgPool2D): class TestSeqLastPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.LAST} self.attrs = {'pooltype': "LAST"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[-1, :], (3, 17)) out[i] = np.reshape(sub_x[-1, :], (3, 17))
...@@ -139,7 +130,7 @@ class TestSeqLastPool2D(TestSeqAvgPool2D): ...@@ -139,7 +130,7 @@ class TestSeqLastPool2D(TestSeqAvgPool2D):
class TestSeqFirstPool(TestSeqAvgPool): class TestSeqFirstPool(TestSeqAvgPool):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.FIRST} self.attrs = {'pooltype': "FIRST"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[0, :] out[i] = sub_x[0, :]
...@@ -147,7 +138,7 @@ class TestSeqFirstPool(TestSeqAvgPool): ...@@ -147,7 +138,7 @@ class TestSeqFirstPool(TestSeqAvgPool):
class TestSeqFirstPool2D(TestSeqAvgPool2D): class TestSeqFirstPool2D(TestSeqAvgPool2D):
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'strategy': SeqPoolType.FIRST} self.attrs = {'pooltype': "FIRST"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[0, :], (3, 17)) out[i] = np.reshape(sub_x[0, :], (3, 17))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册