提交 8266546e 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #2480 from emailweixu/repeat_layer

Repeat layer for column vector
......@@ -40,6 +40,7 @@ namespace paddle {
class FeatureMapExpandLayer : public Layer {
private:
int numFilters_;
bool asRowVector_;
public:
explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {}
......@@ -62,6 +63,7 @@ bool FeatureMapExpandLayer::init(const LayerMap& layerMap,
CHECK_EQ(inputLayers_.size(), 1UL);
numFilters_ = config_.num_filters();
asRowVector_ = config_.user_arg() != "as_col_vec";
return true;
}
......@@ -76,16 +78,30 @@ void FeatureMapExpandLayer::forward(PassType passType) {
{
AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
outVTmp->addRowVector(*inVTmp);
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
outVTmp->addRowVector(*inVTmp);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_);
outVTmp->addColVector(*inVTmp);
}
}
}
/* activation */ {
......@@ -102,24 +118,38 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
MatrixPtr outGrad = getOutputGrad();
size_t batchSize = getInput(0).getBatchSize();
int imgSize = inGrad->getWidth();
/* Do activation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
{
AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
numFilters_,
imgSize,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
inGradTmp->sumRows(*outGradTmp, 1, 1);
}
}
}
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
}
} // namespace paddle.
......@@ -1598,12 +1598,15 @@ TEST(Layer, FeatureMapExpandLayer) {
/* paraSize= */ 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config,
"featmap_expand",
/*batch_size*/ 100,
/* trans= */ false,
useGpu,
/* useWeight */ true);
for (auto asRowVec : {false, true}) {
config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec");
testLayerGrad(config,
"featmap_expand",
/*batch_size*/ 100,
/* trans= */ false,
useGpu,
/* useWeight */ true);
}
}
}
......
......@@ -1926,7 +1926,6 @@ class BatchNormLayer(LayerBase):
def __init__(self,
name,
inputs,
active_type="linear",
bias=True,
use_global_stats=True,
moving_average_fraction=0.9,
......@@ -1964,12 +1963,7 @@ class BatchNormLayer(LayerBase):
cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__(
name,
self.layer_type,
0,
active_type=active_type,
inputs=inputs,
**xargs)
name, self.layer_type, 0, inputs=inputs, **xargs)
if use_global_stats is not None:
self.config.use_global_stats = use_global_stats
......@@ -2377,15 +2371,23 @@ class ExpandLayer(LayerBase):
@config_layer('featmap_expand')
class FeatMapExpandLayer(LayerBase):
def __init__(self, name, inputs, device=None, num_filters=None, bias=False):
def __init__(self,
name,
inputs,
num_filters=None,
as_row_vector=True,
bias=False,
**xargs):
super(FeatMapExpandLayer, self).__init__(
name, 'featmap_expand', 0, inputs=inputs, device=device)
name, 'featmap_expand', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
if num_filters is not None:
self.config.num_filters = num_filters
else:
logger.fatal("FeatMapExpandLayer must specify num_filters.")
if not as_row_vector:
self.config.user_arg = "as_col_vec"
self.set_layer_size(self.get_input_layer(0).size * num_filters)
......@@ -2395,14 +2397,12 @@ class MaxLayer(LayerBase):
name,
inputs,
trans_type='non-seq',
active_type='linear',
bias=False,
output_max_index=None,
**xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type
self.config.active_type = active_type
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size)
......@@ -2444,18 +2444,12 @@ class SequenceLastInstanceLayer(LayerBase):
def __init__(self,
name,
inputs,
active_type='linear',
trans_type='non-seq',
bias=False,
stride=-1,
**xargs):
super(SequenceLastInstanceLayer, self).__init__(
name,
'seqlastins',
0,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqlastins', 0, inputs=inputs, **xargs)
config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
if trans_type == 'seq':
......@@ -2471,7 +2465,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__(self,
name,
inputs,
active_type='linear',
trans_type='non-seq',
bias=False,
stride=-1,
......@@ -2479,7 +2472,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
super(SequenceFirstInstanceLayer, self).__init__(
name,
inputs=inputs,
active_type=active_type,
trans_type=trans_type,
bias=bias,
stride=stride,
......@@ -2489,14 +2481,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
@config_layer('seqconcat')
class SequenceConcatLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
def __init__(self, name, inputs, bias=False, **xargs):
super(SequenceConcatLayer, self).__init__(
name,
'seqconcat',
0,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqconcat', 0, inputs=inputs, **xargs)
config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)):
......@@ -2507,20 +2494,9 @@ class SequenceConcatLayer(LayerBase):
@config_layer('seqreshape')
class SequenceReshapeLayer(LayerBase):
def __init__(self,
name,
size,
inputs,
active_type='linear',
bias=False,
**xargs):
def __init__(self, name, size, inputs, bias=False, **xargs):
super(SequenceReshapeLayer, self).__init__(
name,
'seqreshape',
size,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqreshape', size, inputs=inputs, **xargs)
config_assert(
len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size)
......@@ -2529,9 +2505,9 @@ class SequenceReshapeLayer(LayerBase):
@config_layer('subseq')
class SubSequenceLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
def __init__(self, name, inputs, bias=False, **xargs):
super(SubSequenceLayer, self).__init__(
name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs)
name, 'subseq', 0, inputs=inputs, **xargs)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0)
size = input_layer0.size
......@@ -2687,11 +2663,10 @@ class AverageLayer(LayerBase):
inputs,
average_strategy='average',
trans_type='non-seq',
active_type='linear',
bias=False,
**xargs):
super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, active_type=active_type, **xargs)
name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy
self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
......
......@@ -1553,14 +1553,24 @@ def expand_layer(input,
@wrap_name_default()
@wrap_act_default(act=IdentityActivation())
@layer_support()
def repeat_layer(input, num_repeats, name=None, layer_attr=None):
def repeat_layer(input,
num_repeats,
as_row_vector=True,
act=None,
name=None,
layer_attr=None):
"""
A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input.
A layer for repeating the input for num_repeats times.
If as_row_vector:
.. math::
y = [x, x, \cdots, x]
y = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n]
If not as_row_vector:
.. math::
y = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n]
The example usage is:
......@@ -1573,6 +1583,14 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
:param num_repeats: Repeat the input so many times
:type num_repeats: int
:param name: Layer name.
:param as_row_vector: True for treating input as row vector and repeating
in the column direction. This is equivalent to apply
concat_layer() with num_repeats same input.
False for treating input as column vector and repeating
in the row direction.
:type as_row_vector: bool
:param act: Activation type.
:type act: BaseActivation
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
......@@ -1583,13 +1601,16 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
l = Layer(
inputs=[input.name],
name=name,
active_type=act.name,
num_filters=num_repeats,
as_row_vector=as_row_vector,
type=LayerType.FEATURE_MAP_EXPAND_LAYER,
**ExtraAttr.to_kwargs(layer_attr))
return LayerOutput(
name=name,
size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
activation=act,
parents=[input])
......@@ -2834,11 +2855,13 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
Concat sequence a with sequence b.
Inputs:
- a = [a1, a2, ..., an]
- a = [a1, a2, ..., am]
- b = [b1, b2, ..., bn]
- Note that the length of a and b should be the same.
Output: [a1, b1, a2, b2, ..., an, bn]
Output: [a1, ..., am, b1, ..., bn]
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The example usage is:
......
#!/bin/bash
export configs=(test_fc layer_activations projections test_print_layer
export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
......
......@@ -9,7 +9,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -21,7 +21,7 @@ layers {
name: "__first_seq_1__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -33,7 +33,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -44,7 +44,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -55,7 +55,7 @@ layers {
name: "__first_seq_2__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -67,7 +67,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......
......@@ -123,7 +123,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__simple_gru_0__"
}
......@@ -134,7 +134,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__simple_gru_1__"
}
......
......@@ -205,7 +205,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_0__"
}
......@@ -216,7 +216,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_1__"
}
......
......@@ -138,7 +138,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__recurrent_layer_0__"
}
......@@ -149,7 +149,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__recurrent_layer_1__"
}
......@@ -161,7 +161,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstmemory_0__"
}
......@@ -172,7 +172,7 @@ layers {
name: "__first_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstmemory_1__"
}
......@@ -184,7 +184,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_0__"
}
......@@ -195,7 +195,7 @@ layers {
name: "__first_seq_2__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_1__"
}
......
type: "nn"
layers {
name: "data"
type: "data"
size: 30
active_type: ""
}
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 300
active_type: ""
inputs {
input_layer_name: "data"
}
num_filters: 10
}
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 300
active_type: "tanh"
inputs {
input_layer_name: "data"
}
num_filters: 10
user_arg: "as_col_vec"
}
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__repeat_layer_0__"
layer_names: "__repeat_layer_1__"
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
is_recurrent_layer_group: false
}
......@@ -91,7 +91,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_forward"
}
......@@ -140,7 +140,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_back"
}
......@@ -190,7 +190,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_subseq_forward"
}
......@@ -280,7 +280,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_0__"
}
......@@ -329,7 +329,7 @@ layers {
name: "__last_seq_3__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_group_0__"
}
......@@ -378,7 +378,7 @@ layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
......
......@@ -27,7 +27,7 @@ layers {
name: "__seqreshape_0__"
type: "seqreshape"
size: 5
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data1"
}
......
......@@ -9,7 +9,7 @@ layers {
name: "__seq_pooling_0__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -19,7 +19,7 @@ layers {
name: "__seq_pooling_1__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -29,7 +29,7 @@ layers {
name: "__seq_pooling_2__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -40,7 +40,7 @@ layers {
name: "__seq_pooling_3__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -51,7 +51,7 @@ layers {
name: "__seq_pooling_4__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -62,7 +62,7 @@ layers {
name: "__seq_pooling_5__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -73,7 +73,7 @@ layers {
name: "__seq_pooling_6__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
din = data_layer(name='data', size=30)
outputs(
repeat_layer(
input=din, num_repeats=10, as_row_vector=True),
repeat_layer(
input=din, num_repeats=10, act=TanhActivation(), as_row_vector=False))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册