提交 7f380c1b 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #2701 from luotao1/stride

stride pooling for max and average layer
...@@ -25,6 +25,10 @@ namespace paddle { ...@@ -25,6 +25,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq: * If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances) * Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]} * output[i] = average_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the average pooling
* operation is then applied to each interval independently.
* If SequenceLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences * Output: output size is the number of input sub-sequences
......
...@@ -26,6 +26,10 @@ namespace paddle { ...@@ -26,6 +26,10 @@ namespace paddle {
* If SequenceLevel = kNonSeq: * If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances) * Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]} * output[i] = max_{for each instance in this sequence}{input[i]}
* If stride_ > 0:
* Output: a shorten sequence. Stride is the step size by which we slide a
* window upon the input sequence, and the max pooling operation is
* then applied to each interval independently.
* If SequenceLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences * Output: output size is the number of input sub-sequences
......
...@@ -26,10 +26,9 @@ namespace paddle { ...@@ -26,10 +26,9 @@ namespace paddle {
* If SequenceLevel = kNonseq: * If SequenceLevel = kNonseq:
* Output: a sequence containing only the last instance of the input sequence * Output: a sequence containing only the last instance of the input sequence
* If stride_ > 0: * If stride_ > 0:
* Output: a shorten sequence. The operation of getting last instance of a * Output: a shorten sequence. Stride is the step size by which we slide a
* sequence is independently performed on every slice of the input * window upon the input sequence, and getting last instance
* sequence, which is obtained by sliding a window with the window * operation is then applied to each interval independently.
* size set to stride_.
* If SequenceLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence * Output: a sequence containing only the last instance of each sub-sequence
...@@ -73,8 +72,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, ...@@ -73,8 +72,7 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap,
void SequenceLastInstanceLayer::forward(PassType passType) { void SequenceLastInstanceLayer::forward(PassType passType) {
SequencePoolLayer::forward(passType); SequencePoolLayer::forward(passType);
auto starts = (stride_ > 0) ? stridePositions_->getData() auto starts = startPositions_->getData(false);
: startPositions_->getData(false);
MatrixPtr inputValue = getInputValue(0); MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue(); MatrixPtr outputValue = getOutputValue();
......
...@@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) { ...@@ -72,9 +72,8 @@ void SequencePoolLayer::forward(PassType passType) {
if (stride_ > 0) { if (stride_ > 0) {
CHECK_EQ(input.hasSubseq(), 0UL) CHECK_EQ(input.hasSubseq(), 0UL)
<< "sequence stride pooling is invalid for hasSubseq now"; << "sequence stride pooling is invalid for hasSubseq now";
output_.poolSequenceWithStride( output_.poolSequenceWithStride(input, stride_, &startPositions_, reversed_);
input, stride_, &stridePositions_, reversed_); newBatchSize_ = startPositions_->getSize() - 1;
newBatchSize_ = stridePositions_->getSize() - 1;
} }
resetOutput(newBatchSize_, dim); resetOutput(newBatchSize_, dim);
......
...@@ -28,8 +28,9 @@ namespace paddle { ...@@ -28,8 +28,9 @@ namespace paddle {
* sequence}{input[i]} * sequence}{input[i]}
* If stride_ > 0: * If stride_ > 0:
* Check input sequence must not have sub-sequence * Check input sequence must not have sub-sequence
* Output: a shorten sequence, pooling is performed upon a small local * Output: a shorten sequence. Stride is the step size by which we slide
* area * a window upon the input sequence, and the pooling operation
* is then applied to each interval independently.
* If SequenceLevel = kSeq: * If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence * Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences * Output: output size is the number of input sub-sequences
...@@ -47,8 +48,6 @@ protected: ...@@ -47,8 +48,6 @@ protected:
size_t newBatchSize_; size_t newBatchSize_;
ICpuGpuVectorPtr startPositions_; ICpuGpuVectorPtr startPositions_;
int stride_; int stride_;
// Store the start position of each window.
IVectorPtr stridePositions_;
// Whether the input sequence is reversed or not. // Whether the input sequence is reversed or not.
bool reversed_ = false; bool reversed_ = false;
......
...@@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq, ...@@ -845,8 +845,12 @@ void testDegradeLayer(bool hasSubseq,
TEST(Layer, MaxLayer) { TEST(Layer, MaxLayer) {
testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq testDegradeLayer(false,
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq "max",
"non-seq",
5); // seq max to a shorten seq, stride window = 5
testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq
testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq
} }
TEST(Layer, SequenceLastInstanceLayer) { TEST(Layer, SequenceLastInstanceLayer) {
...@@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) { ...@@ -868,6 +872,10 @@ TEST(Layer, SequenceLastInstanceLayer) {
TEST(Layer, AverageLayer) { TEST(Layer, AverageLayer) {
testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq
testDegradeLayer(false,
"average",
"non-seq",
5); // seq average to a shorten seq, stride window = 5
testDegradeLayer( testDegradeLayer(
true, "average", "non-seq", -1); // hasSubseq average to non-seq true, "average", "non-seq", -1); // hasSubseq average to non-seq
testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq
......
...@@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) { ...@@ -561,7 +561,7 @@ void Argument::degradeSequence(const Argument& input) {
void Argument::poolSequenceWithStride(const Argument& input, void Argument::poolSequenceWithStride(const Argument& input,
size_t stride, size_t stride,
IVectorPtr* stridePostions, ICpuGpuVectorPtr* stridePostions,
bool reversed) { bool reversed) {
// If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5,
// then sequenceStartPositions = [0, 2, 3, 4, 7]. // then sequenceStartPositions = [0, 2, 3, 4, 7].
...@@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input, ...@@ -598,8 +598,8 @@ void Argument::poolSequenceWithStride(const Argument& input,
stridePos.emplace_back(starts[numSequences]); stridePos.emplace_back(starts[numSequences]);
int size = stridePos.size(); int size = stridePos.size();
CHECK_EQ(size - 1, tgtBuf[numSequences]); CHECK_EQ(size - 1, tgtBuf[numSequences]);
IVector::resizeOrCreate(*stridePostions, size, false); ICpuGpuVector::resizeOrCreate(*stridePostions, size, false);
(*stridePostions)->copyFrom(stridePos.data(), size); (*stridePostions)->getMutableVector(false)->copyFrom(stridePos.data(), size);
} }
void Argument::getValueString( void Argument::getValueString(
......
...@@ -299,7 +299,7 @@ struct Argument { ...@@ -299,7 +299,7 @@ struct Argument {
*/ */
void poolSequenceWithStride(const Argument& input, void poolSequenceWithStride(const Argument& input,
size_t stride, size_t stride,
IVectorPtr* stridePositions, ICpuGpuVectorPtr* stridePositions,
bool reversed = false); bool reversed = false);
/** /**
* @brief getValueString will return the argument's output in string. There * @brief getValueString will return the argument's output in string. There
......
...@@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) { ...@@ -31,7 +31,7 @@ TEST(Argument, poolSequenceWithStride) {
int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30};
for (auto reversed : {false, true}) { for (auto reversed : {false, true}) {
IVectorPtr stridePositions; ICpuGpuVectorPtr stridePositions;
output.poolSequenceWithStride( output.poolSequenceWithStride(
input, 5 /* stride */, &stridePositions, reversed); input, 5 /* stride */, &stridePositions, reversed);
...@@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) { ...@@ -45,7 +45,7 @@ TEST(Argument, poolSequenceWithStride) {
CHECK_EQ(stridePositions->getSize(), 8UL); CHECK_EQ(stridePositions->getSize(), 8UL);
auto result = reversed ? strideResultReversed : strideResult; auto result = reversed ? strideResultReversed : strideResult;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
CHECK_EQ(stridePositions->getData()[i], result[i]); CHECK_EQ(stridePositions->getData(false)[i], result[i]);
} }
} }
} }
......
...@@ -2466,10 +2466,14 @@ class MaxLayer(LayerBase): ...@@ -2466,10 +2466,14 @@ class MaxLayer(LayerBase):
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
output_max_index=None, output_max_index=None,
stride=-1,
**xargs): **xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
...@@ -2731,11 +2735,15 @@ class AverageLayer(LayerBase): ...@@ -2731,11 +2735,15 @@ class AverageLayer(LayerBase):
average_strategy='average', average_strategy='average',
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
stride=-1,
**xargs): **xargs):
super(AverageLayer, self).__init__( super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, **xargs) name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy self.config.average_strategy = average_strategy
if trans_type == 'seq':
config_assert(stride == -1, 'subseq does not support stride window')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.seq_pool_stride = stride
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
......
...@@ -1246,10 +1246,19 @@ def pooling_layer(input, ...@@ -1246,10 +1246,19 @@ def pooling_layer(input,
name=None, name=None,
bias_attr=None, bias_attr=None,
agg_level=AggregateLevel.TO_NO_SEQUENCE, agg_level=AggregateLevel.TO_NO_SEQUENCE,
stride=-1,
layer_attr=None): layer_attr=None):
""" """
Pooling layer for sequence inputs, not used for Image. Pooling layer for sequence inputs, not used for Image.
If stride > 0, this layer slides a window whose size is determined by stride,
and return the pooling value of the window as the output. Thus, a long sequence
will be shorten.
The parameter stride specifies the intervals at which to apply the pooling
operation. Note that for sequence with sub-sequence, the default value
of stride is -1.
The example usage is: The example usage is:
.. code-block:: python .. code-block:: python
...@@ -1268,6 +1277,8 @@ def pooling_layer(input, ...@@ -1268,6 +1277,8 @@ def pooling_layer(input,
:param pooling_type: Type of pooling, MaxPooling(default), AvgPooling, :param pooling_type: Type of pooling, MaxPooling(default), AvgPooling,
SumPooling, SquareRootNPooling. SumPooling, SquareRootNPooling.
:type pooling_type: BasePoolingType|None :type pooling_type: BasePoolingType|None
:param stride: The step size between successive pooling regions.
:type stride: Int
:param bias_attr: Bias parameter attribute. False if no bias. :param bias_attr: Bias parameter attribute. False if no bias.
:type bias_attr: ParameterAttribute|None|False :type bias_attr: ParameterAttribute|None|False
:param layer_attr: The Extra Attributes for layer, such as dropout. :param layer_attr: The Extra Attributes for layer, such as dropout.
...@@ -1285,12 +1296,16 @@ def pooling_layer(input, ...@@ -1285,12 +1296,16 @@ def pooling_layer(input,
extra_dict['output_max_index'] = pooling_type.output_max_index extra_dict['output_max_index'] = pooling_type.output_max_index
extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr)) extra_dict.update(ExtraLayerAttribute.to_kwargs(layer_attr))
if agg_level == AggregateLevel.TO_SEQUENCE:
assert stride == -1
Layer( Layer(
name=name, name=name,
type=pooling_type.name, type=pooling_type.name,
inputs=[Input(input.name)], inputs=[Input(input.name)],
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
trans_type=agg_level, trans_type=agg_level,
stride=stride,
**extra_dict) **extra_dict)
return LayerOutput( return LayerOutput(
...@@ -1552,7 +1567,7 @@ def last_seq(input, ...@@ -1552,7 +1567,7 @@ def last_seq(input,
:type name: basestring :type name: basestring
:param input: Input layer name. :param input: Input layer name.
:type input: LayerOutput :type input: LayerOutput
:param stride: window size. :param stride: The step size between successive pooling regions.
:type stride: Int :type stride: Int
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -1608,7 +1623,7 @@ def first_seq(input, ...@@ -1608,7 +1623,7 @@ def first_seq(input,
:type name: basestring :type name: basestring
:param input: Input layer name. :param input: Input layer name.
:type input: LayerOutput :type input: LayerOutput
:param stride: window size. :param stride: The step size between successive pooling regions.
:type stride: Int :type stride: Int
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
......
...@@ -14,6 +14,7 @@ layers { ...@@ -14,6 +14,7 @@ layers {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
trans_type: "seq" trans_type: "seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_1__" name: "__seq_pooling_1__"
...@@ -24,6 +25,7 @@ layers { ...@@ -24,6 +25,7 @@ layers {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
trans_type: "non-seq" trans_type: "non-seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_2__" name: "__seq_pooling_2__"
...@@ -35,6 +37,7 @@ layers { ...@@ -35,6 +37,7 @@ layers {
} }
average_strategy: "average" average_strategy: "average"
trans_type: "seq" trans_type: "seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_3__" name: "__seq_pooling_3__"
...@@ -46,6 +49,7 @@ layers { ...@@ -46,6 +49,7 @@ layers {
} }
average_strategy: "average" average_strategy: "average"
trans_type: "non-seq" trans_type: "non-seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_4__" name: "__seq_pooling_4__"
...@@ -57,6 +61,7 @@ layers { ...@@ -57,6 +61,7 @@ layers {
} }
average_strategy: "sum" average_strategy: "sum"
trans_type: "seq" trans_type: "seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_5__" name: "__seq_pooling_5__"
...@@ -68,6 +73,7 @@ layers { ...@@ -68,6 +73,7 @@ layers {
} }
average_strategy: "sum" average_strategy: "sum"
trans_type: "non-seq" trans_type: "non-seq"
seq_pool_stride: -1
} }
layers { layers {
name: "__seq_pooling_6__" name: "__seq_pooling_6__"
...@@ -77,8 +83,44 @@ layers { ...@@ -77,8 +83,44 @@ layers {
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_7__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "average"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_8__"
type: "average"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
average_strategy: "sum"
trans_type: "non-seq"
seq_pool_stride: 5
}
layers {
name: "__seq_pooling_9__"
type: "max"
size: 100
active_type: ""
inputs {
input_layer_name: "dat_in"
}
output_max_index: true output_max_index: true
trans_type: "non-seq" trans_type: "non-seq"
seq_pool_stride: -1
} }
input_layer_names: "dat_in" input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__" output_layer_names: "__seq_pooling_0__"
...@@ -88,6 +130,9 @@ output_layer_names: "__seq_pooling_3__" ...@@ -88,6 +130,9 @@ output_layer_names: "__seq_pooling_3__"
output_layer_names: "__seq_pooling_4__" output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__" output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__" output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
sub_models { sub_models {
name: "root" name: "root"
layer_names: "dat_in" layer_names: "dat_in"
...@@ -98,6 +143,9 @@ sub_models { ...@@ -98,6 +143,9 @@ sub_models {
layer_names: "__seq_pooling_4__" layer_names: "__seq_pooling_4__"
layer_names: "__seq_pooling_5__" layer_names: "__seq_pooling_5__"
layer_names: "__seq_pooling_6__" layer_names: "__seq_pooling_6__"
layer_names: "__seq_pooling_7__"
layer_names: "__seq_pooling_8__"
layer_names: "__seq_pooling_9__"
input_layer_names: "dat_in" input_layer_names: "dat_in"
output_layer_names: "__seq_pooling_0__" output_layer_names: "__seq_pooling_0__"
output_layer_names: "__seq_pooling_1__" output_layer_names: "__seq_pooling_1__"
...@@ -106,6 +154,9 @@ sub_models { ...@@ -106,6 +154,9 @@ sub_models {
output_layer_names: "__seq_pooling_4__" output_layer_names: "__seq_pooling_4__"
output_layer_names: "__seq_pooling_5__" output_layer_names: "__seq_pooling_5__"
output_layer_names: "__seq_pooling_6__" output_layer_names: "__seq_pooling_6__"
output_layer_names: "__seq_pooling_7__"
output_layer_names: "__seq_pooling_8__"
output_layer_names: "__seq_pooling_9__"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
...@@ -14,6 +14,14 @@ for pt in POOL_TYPE: ...@@ -14,6 +14,14 @@ for pt in POOL_TYPE:
for al in AGG_LEVEL: for al in AGG_LEVEL:
opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt())) opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))
for pt in POOL_TYPE:
opts.append(
pooling_layer(
input=din,
agg_level=AggregateLevel.TO_NO_SEQUENCE,
pooling_type=pt(),
stride=5))
opts.append( opts.append(
pooling_layer( pooling_layer(
input=din, pooling_type=MaxPooling(output_max_index=True))) input=din, pooling_type=MaxPooling(output_max_index=True)))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册