提交 d130d181 编写于 作者: Y Yu Yang 提交者: GitHub

Complete unittest for trainer_config_helpers. (#108)

* Fix lots of trainer_config_helpers bug, and complete unittest for `layers.py`
上级 1c56e0dc
...@@ -1279,7 +1279,7 @@ class LayerBase(object): ...@@ -1279,7 +1279,7 @@ class LayerBase(object):
size, size,
dims=None, dims=None,
sparse = None, sparse = None,
format = "csr"): format = None):
if dims is None: if dims is None:
# TODO(yuyang18): print warning and callstack here! # TODO(yuyang18): print warning and callstack here!
dims = list() dims = list()
...@@ -2074,7 +2074,7 @@ class MaxLayer(LayerBase): ...@@ -2074,7 +2074,7 @@ class MaxLayer(LayerBase):
active_type='linear', active_type='linear',
device=None, device=None,
bias=False, bias=False,
output_max_index=False): output_max_index=None):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, device=device) super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, device=device)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
...@@ -2083,7 +2083,8 @@ class MaxLayer(LayerBase): ...@@ -2083,7 +2083,8 @@ class MaxLayer(LayerBase):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
self.config.output_max_index=output_max_index if output_max_index is not None:
self.config.output_max_index = output_max_index
@config_layer('maxid') @config_layer('maxid')
...@@ -2440,7 +2441,7 @@ class MixedLayer(LayerBase): ...@@ -2440,7 +2441,7 @@ class MixedLayer(LayerBase):
inputs, inputs,
size=0, size=0,
bias=True, bias=True,
error_clipping_threshold=0.0, error_clipping_threshold=None,
**xargs): **xargs):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
super(MixedLayer, self).__init__( super(MixedLayer, self).__init__(
...@@ -2510,7 +2511,8 @@ class MixedLayer(LayerBase): ...@@ -2510,7 +2511,8 @@ class MixedLayer(LayerBase):
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
self.config.error_clipping_threshold = error_clipping_threshold if error_clipping_threshold is not None:
self.config.error_clipping_threshold = error_clipping_threshold
# like MixedLayer, but no bias parameter # like MixedLayer, but no bias parameter
@config_func @config_func
......
...@@ -15,8 +15,10 @@ ...@@ -15,8 +15,10 @@
__all__ = ["TanhActivation", "SigmoidActivation", __all__ = ["TanhActivation", "SigmoidActivation",
"SoftmaxActivation", "IdentityActivation", "LinearActivation", "SoftmaxActivation", "IdentityActivation", "LinearActivation",
'SequenceSoftmaxActivation', 'ExpActivation', 'SequenceSoftmaxActivation', 'ExpActivation',
"ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "ReluActivation", "BReluActivation", "SoftReluActivation",
"AbsActivation", "SquareActivation", "BaseActivation"] "STanhActivation",
"AbsActivation", "SquareActivation",
"BaseActivation"]
class BaseActivation(object): class BaseActivation(object):
...@@ -36,6 +38,9 @@ class BaseActivation(object): ...@@ -36,6 +38,9 @@ class BaseActivation(object):
self.name = name self.name = name
self.support_hppl = support_hppl self.support_hppl = support_hppl
def __repr__(self):
return self.name
class TanhActivation(BaseActivation): class TanhActivation(BaseActivation):
""" """
......
...@@ -616,7 +616,7 @@ def lstmemory_group(input, size=None, name=None, ...@@ -616,7 +616,7 @@ def lstmemory_group(input, size=None, name=None,
cell states, or hidden states in every time step are accessible to for the cell states, or hidden states in every time step are accessible to for the
user. This is especially useful in attention model. If you do not need to user. This is especially useful in attention model. If you do not need to
access to the internal states of the lstm, but merely use its outputs, access to the internal states of the lstm, but merely use its outputs,
it is recommanded to use the lstmemory, which is relatively faster than it is recommended to use the lstmemory, which is relatively faster than
lstmemory_group. lstmemory_group.
NOTE: In PaddlePaddle's implementation, the following input-to-hidden NOTE: In PaddlePaddle's implementation, the following input-to-hidden
...@@ -1052,7 +1052,7 @@ def dropout_layer(input, dropout_rate, name=None): ...@@ -1052,7 +1052,7 @@ def dropout_layer(input, dropout_rate, name=None):
layer_attr=ExtraAttr(drop_rate=dropout_rate)) layer_attr=ExtraAttr(drop_rate=dropout_rate))
def outputs(layers): def outputs(layers, *args):
""" """
Declare the end of network. Currently it will only calculate the Declare the end of network. Currently it will only calculate the
input/output order of network. It will calculate the predict network or input/output order of network. It will calculate the predict network or
...@@ -1089,9 +1089,12 @@ def outputs(layers): ...@@ -1089,9 +1089,12 @@ def outputs(layers):
if isinstance(layers, LayerOutput): if isinstance(layers, LayerOutput):
layers = [layers] layers = [layers]
if len(args) != 0:
layers.extend(args)
assert len(layers) > 0 assert len(layers) > 0
if len(layers) != 1: if len(layers) != 1:
logger.warning("EndOfNetwork routine try to calculate network's" logger.warning("`outputs` routine try to calculate network's"
" inputs and outputs order. It might not work well." " inputs and outputs order. It might not work well."
"Please see follow log carefully.") "Please see follow log carefully.")
inputs = [] inputs = []
......
...@@ -47,9 +47,14 @@ class MaxPooling(BasePoolingType): ...@@ -47,9 +47,14 @@ class MaxPooling(BasePoolingType):
.. math:: .. math::
max(samples\\_of\\_a\\_sequence) max(samples\\_of\\_a\\_sequence)
:param output_max_index: True if output sequence max index instead of max
value. None means use default value in proto.
:type output_max_index: bool|None
""" """
def __init__(self): def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max") BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index
class AvgPooling(BasePoolingType): class AvgPooling(BasePoolingType):
......
...@@ -3,3 +3,8 @@ add_test(NAME layers_test ...@@ -3,3 +3,8 @@ add_test(NAME layers_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py python ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_layerHelpers
COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
)
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr
6b39e34beea8dfb782bee9bd3dea9eb5 simple_rnn_layers.protostr
0fc1409600f1a3301da994ab9d28b0bf test_cost_layers.protostr
144bc6d3a509de74115fa623741797ed test_expand_layer.protostr
2378518bdb71e8c6e888b1842923df58 test_fc.protostr
8bb44e1e5072d0c261572307e7672bda test_grumemory_layer.protostr
1f3510672dce7a9ed25317fc58579ac7 test_hsigmoid.protostr
d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr
251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr
e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr
2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr
67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr
f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr
8122477f4f65244580cec09edc590041 util_layers.protostr
#!/bin/bash
set -e
cd `dirname $0`
export PYTHONPATH=$PWD/../../../../
configs=(test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group)
for conf in ${configs[*]}
do
echo "Generating " $conf
python -m paddle.utils.dump_config $conf.py > $conf.protostr
done
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-3,
batch_size=1000
)
img = data_layer(name='image', size=256*256)
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
filter_size=(32, 64), padding=(1, 0), stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm)
\ No newline at end of file
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=30)
seq_op = [
first_seq,
last_seq
]
agg_level = [
AggregateLevel.EACH_SEQUENCE,
AggregateLevel.EACH_TIMESTEP
]
opts = []
for op in seq_op:
for al in agg_level:
opts.append(op(input=din, agg_level=al))
outputs(opts)
\ No newline at end of file
'''
Test all activations.
'''
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='input', size=100)
acts = [
TanhActivation, SigmoidActivation, SoftmaxActivation, IdentityActivation,
LinearActivation, ExpActivation, ReluActivation, BReluActivation,
SoftReluActivation, STanhActivation, AbsActivation, SquareActivation]
outputs(
[fc_layer(input=din, size=100, act=act(), name="layer_%d" % i) for i, act in
enumerate(acts)])
'''
Test mixed layer, projections and operators.
'''
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='test', size=100)
din = embedding_layer(input=din, size=256)
with mixed_layer(size=100) as m1:
m1 += full_matrix_projection(input=din)
with mixed_layer(size=100) as m2:
m2 += table_projection(input=m1)
with mixed_layer(size=100) as m3:
m3 += identity_projection(input=m2)
with mixed_layer(size=100) as m4:
m4 += dotmul_projection(input=m3)
with mixed_layer() as m5:
m5 += context_projection(input=m4, context_len=3)
with mixed_layer() as m6:
m6 += dotmul_operator(a=m3, b=m4)
img = data_layer(name='img', size=32*32)
flt = data_layer(name='filter', size=3*3*1*64)
with mixed_layer() as m7:
m7 += conv_operator(img=img, filter=flt, num_filters=64,
num_channel=1, filter_size=3)
end = mixed_layer(input=[full_matrix_projection(input=m5),
trans_full_matrix_projection(input=m6),
full_matrix_projection(input=m7)],
size=100,
layer_attr=ExtraAttr(drop_rate=0.5,
error_clipping_threshold=40))
outputs(end)
#!/bin/bash
cd `dirname $0`
set -e
./generate_protostr.sh
md5sum -c check.md5
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=200)
hidden = fc_layer(input=din, size=200, act=SigmoidActivation())
rnn = recurrent_layer(input=hidden, act=SigmoidActivation())
rnn2 = recurrent_layer(input=hidden, act=SigmoidActivation(), reverse=True)
lstm1_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(),
bias_attr=False)
lstm1 = lstmemory(input=lstm1_param, act=SigmoidActivation())
lstm2_param = fc_layer(input=hidden, size=200*4, act=LinearActivation(),
bias_attr=False)
lstm2 = lstmemory(input=lstm2_param, act=SigmoidActivation(), reverse=True)
gru1_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(),
bias_attr=False)
gru1 = grumemory(input=gru1_param, act=SigmoidActivation())
gru2_param = fc_layer(input=hidden, size=200*3, act=LinearActivation(),
bias_attr=False)
gru2 = grumemory(input=gru2_param, act=SigmoidActivation(), reverse=True)
outputs(last_seq(input=rnn), first_seq(input=rnn2),
last_seq(input=lstm1), first_seq(input=lstm2),
last_seq(input=gru1), first_seq(gru2))
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
seq_in = data_layer(name='input', size=200)
labels = data_layer(name='labels', size=5000)
probs = data_layer(name='probs', size=10)
xe_label = data_layer(name='xe-label', size=10)
outputs(ctc_layer(input=seq_in, label=labels),
crf_layer(input=fc_layer(input=seq_in, size=4),
label=data_layer(name='crf_label', size=4)),
rank_cost(left=data_layer(name='left', size=1),
right=data_layer(name='right', size=1),
label=data_layer(name='label', size=1)),
lambda_cost(input=data_layer(name='list_feature', size=100),
score=data_layer(name='list_scores', size=1)),
cross_entropy(input=probs, label=xe_label),
cross_entropy_with_selfnorm(input=probs, label=xe_label),
huber_cost(input=data_layer(name='huber_probs', size=1),
label=data_layer(name='huber_label', size=1)),
multi_binary_label_cross_entropy(input=probs, label=xe_label))
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=30)
data_seq = data_layer(name='data_seq', size=30)
outputs(expand_layer(input=din, expand_as=data_seq,
expand_level=ExpandLevel.FROM_SEQUENCE),
expand_layer(input=din, expand_as=data_seq,
expand_level=ExpandLevel.FROM_TIMESTEP))
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=100)
trans = trans_layer(input=din)
hidden = fc_layer(input=trans, size=100,
bias_attr=False)
mask = data_layer(name='mask', size=100)
hidden_sel = selective_fc_layer(input=din, select=mask, size=100,
act=SigmoidActivation())
outputs(hidden, hidden_sel)
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-4
)
din = data_layer(name='data', size=120)
outputs(grumemory(input=din, size=40, reverse=True, gate_act=TanhActivation(),
act=SigmoidActivation()))
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='data', size=100)
label = data_layer(name='label', size=10)
outputs(hsigmoid(input=din, label=label, num_classes=10))
\ No newline at end of file
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
din = data_layer(name='data', size=128)
outputs(lstmemory(input=din, reverse=True, gate_act=TanhActivation(),
act=TanhActivation(), size=32))
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-5
)
weight = data_layer(name='w', size=1)
a = data_layer(name='a', size=100)
b = data_layer(name='b', size=100)
c = data_layer(name='c', size=200)
d = data_layer(name='d', size=31)
outputs(interpolation_layer(input=[a, b], weight=weight),
power_layer(input=a, weight=weight),
scaling_layer(input=a, weight=weight),
cos_sim(a=a, b=b),
cos_sim(a=a, b=c, size=2),
sum_to_one_norm_layer(input=a),
conv_shift_layer(a=a, b=d),
tensor_layer(a=a, b=b, size=1000),
slope_intercept_layer(input=a, slope=0.7, intercept=0.9),
linear_comb_layer(weights=b, vectors=c))
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='input', size=100)
print_layer(input=din)
outputs(din)
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
seq = data_layer(name='seq_input', size=100)
sub_seq = data_layer(name='sub_seq_input', size=100)
lbl = data_layer(name='label', size=1)
def generate_rnn_simple(name):
def rnn_simple(s):
m = memory(name=name, size=200)
fc = fc_layer(input=[s, m], size=200, name=name)
return fc
return rnn_simple
with mixed_layer() as lstm_param: # test lstm unit, rnn group
lstm_param += full_matrix_projection(input=seq, size=100 * 4)
with mixed_layer() as gru_param:
gru_param += full_matrix_projection(input=seq, size=100 * 3)
outputs(last_seq(input=recurrent_group(step=generate_rnn_simple('rnn_forward'),
input=seq)),
first_seq(input=recurrent_group(step=generate_rnn_simple('rnn_back'),
input=seq, reverse=True)),
last_seq(input=recurrent_group(step=generate_rnn_simple(
'rnn_subseq_forward'), input=SubsequenceInput(input=sub_seq))),
last_seq(input=lstmemory_group(input=lstm_param, size=100)),
last_seq(input=gru_group(input=gru_param, size=100)))
from paddle.trainer_config_helpers import *
settings(
learning_rate=1e-4,
batch_size=1000
)
din = data_layer(name='dat_in', size=100)
POOL_TYPE = [
MaxPooling,
AvgPooling,
SumPooling
]
AGG_LEVEL = [
AggregateLevel.EACH_SEQUENCE,
AggregateLevel.EACH_TIMESTEP
]
opts = []
for pt in POOL_TYPE:
for al in AGG_LEVEL:
opts.append(pooling_layer(input=din, agg_level=al, pooling_type=pt()))
opts.append(pooling_layer(input=din,
pooling_type=MaxPooling(output_max_index=True)))
outputs(opts)
from paddle.trainer_config_helpers import *
settings(
batch_size=1000,
learning_rate=1e-4
)
probs = data_layer(name='probs', size=100)
outputs(
sampling_id_layer(input=probs), # It seems not support training
# It seems this layer is not correct, and should be rewrite.
# block_expand_layer(input=probs, channel=1, block_x=1, block_y=3),
)
\ No newline at end of file
from paddle.trainer_config_helpers import *
settings(learning_rate=1e-4, batch_size=1000)
a = data_layer(name='a', size=10)
b = data_layer(name='b', size=10)
result = addto_layer(input=[a, b])
concat1 = concat_layer(input=[a, b])
concat2 = concat_layer(input=[
identity_projection(input=a),
identity_projection(input=b)
])
outputs(result, concat1, concat2)
\ No newline at end of file
...@@ -47,7 +47,7 @@ print_layer(input=[out]) ...@@ -47,7 +47,7 @@ print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes))) outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1), dotmul = mixed_layer(input=[dotmul_operator(a=x1, b=x1),
dotmul_projection(input=y1)]) dotmul_projection(input=y1)])
proj_with_attr_init = mixed_layer(input=full_matrix_projection(input=y1, proj_with_attr_init = mixed_layer(input=full_matrix_projection(input=y1,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册