diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/img_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..899171ff1d00b87db82a68e829b5d62568c9d43b --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.protostr @@ -0,0 +1,176 @@ +type: "nn" +layers { + name: "image" + type: "data" + size: 65536 + active_type: "" +} +layers { + name: "__conv_0__" + type: "exconv" + size: 3297856 + active_type: "" + inputs { + input_layer_name: "image" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 32 + channels: 1 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 1 + output_x: 227 + img_size: 256 + caffe_mode: true + filter_size_y: 32 + padding_y: 1 + stride_y: 1 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 64 + shared_biases: true +} +layers { + name: "__batch_norm_0__" + type: "batch_norm" + size: 3297856 + active_type: "relu" + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w0" + image_conf { + channels: 64 + img_size: 227 + } + } + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w1" + } + inputs { + input_layer_name: "__conv_0__" + input_parameter_name: "___batch_norm_0__.w2" + } + bias_parameter_name: "___batch_norm_0__.wbias" + moving_average_fraction: 0.899999976158 +} +layers { + name: "__crmnorm_0__" + type: "norm" + size: 3297856 + active_type: "" + inputs { + input_layer_name: "__batch_norm_0__" + norm_conf { + norm_type: "cmrnorm-projection" + channels: 64 + size: 32 + scale: 0.000399999989895 + pow: 0.75 + output_x: 227 + img_size: 227 + blocked: false + } + } +} +layers { + name: "__pool_0__" + type: "pool" + size: 2458624 + active_type: "" + inputs { + input_layer_name: "__conv_0__" + pool_conf { + pool_type: "max-projection" + channels: 64 + size_x: 32 + stride: 1 + output_x: 196 + img_size: 227 + padding: 0 + size_y: 32 + stride_y: 1 + output_y: 196 + img_size_y: 227 + padding_y: 0 + } + } +} +parameters { + name: "___conv_0__.w0" + size: 65536 + initial_mean: 0.0 + initial_std: 0.0441941730678 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 64 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___batch_norm_0__.w0" + size: 64 + initial_mean: 1.0 + initial_std: 0.0 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___batch_norm_0__.w1" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.w2" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.wbias" + size: 64 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 64 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "image" +output_layer_names: "__pool_0__" +output_layer_names: "__crmnorm_0__" +sub_models { + name: "root" + layer_names: "image" + layer_names: "__conv_0__" + layer_names: "__batch_norm_0__" + layer_names: "__crmnorm_0__" + layer_names: "__pool_0__" + input_layer_names: "image" + output_layer_names: "__pool_0__" + output_layer_names: "__crmnorm_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.protostr new file mode 100644 index 0000000000000000000000000000000000000000..7b2911f8e367ebf9d0797e815a7532c714ef698e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.protostr @@ -0,0 +1,69 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "__first_seq_0__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + select_first: true + trans_type: "seq" +} +layers { + name: "__first_seq_1__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + select_first: true + trans_type: "non-seq" +} +layers { + name: "__last_seq_0__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + trans_type: "seq" +} +layers { + name: "__last_seq_1__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + trans_type: "non-seq" +} +input_layer_names: "data" +output_layer_names: "__first_seq_0__" +output_layer_names: "__first_seq_1__" +output_layer_names: "__last_seq_0__" +output_layer_names: "__last_seq_1__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__first_seq_0__" + layer_names: "__first_seq_1__" + layer_names: "__last_seq_0__" + layer_names: "__last_seq_1__" + input_layer_names: "data" + output_layer_names: "__first_seq_0__" + output_layer_names: "__first_seq_1__" + output_layer_names: "__last_seq_0__" + output_layer_names: "__last_seq_1__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.protostr b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.protostr new file mode 100644 index 0000000000000000000000000000000000000000..8ae2421727efeeae85ec478ee54d0e1d6df8e56c --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.protostr @@ -0,0 +1,423 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "layer_0" + type: "fc" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_0.w0" + } + bias_parameter_name: "_layer_0.wbias" +} +layers { + name: "layer_1" + type: "fc" + size: 100 + active_type: "sigmoid" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_1.w0" + } + bias_parameter_name: "_layer_1.wbias" +} +layers { + name: "layer_2" + type: "fc" + size: 100 + active_type: "softmax" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_2.w0" + } + bias_parameter_name: "_layer_2.wbias" +} +layers { + name: "layer_3" + type: "fc" + size: 100 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_3.w0" + } + bias_parameter_name: "_layer_3.wbias" +} +layers { + name: "layer_4" + type: "fc" + size: 100 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_4.w0" + } + bias_parameter_name: "_layer_4.wbias" +} +layers { + name: "layer_5" + type: "fc" + size: 100 + active_type: "exponential" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_5.w0" + } + bias_parameter_name: "_layer_5.wbias" +} +layers { + name: "layer_6" + type: "fc" + size: 100 + active_type: "relu" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_6.w0" + } + bias_parameter_name: "_layer_6.wbias" +} +layers { + name: "layer_7" + type: "fc" + size: 100 + active_type: "brelu" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_7.w0" + } + bias_parameter_name: "_layer_7.wbias" +} +layers { + name: "layer_8" + type: "fc" + size: 100 + active_type: "softrelu" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_8.w0" + } + bias_parameter_name: "_layer_8.wbias" +} +layers { + name: "layer_9" + type: "fc" + size: 100 + active_type: "stanh" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_9.w0" + } + bias_parameter_name: "_layer_9.wbias" +} +layers { + name: "layer_10" + type: "fc" + size: 100 + active_type: "abs" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_10.w0" + } + bias_parameter_name: "_layer_10.wbias" +} +layers { + name: "layer_11" + type: "fc" + size: 100 + active_type: "square" + inputs { + input_layer_name: "input" + input_parameter_name: "_layer_11.w0" + } + bias_parameter_name: "_layer_11.wbias" +} +parameters { + name: "_layer_0.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_0.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_1.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_1.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_2.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_2.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_3.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_3.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_4.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_4.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_5.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_5.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_6.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_6.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_7.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_7.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_8.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_8.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_9.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_9.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_10.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_10.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_layer_11.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_layer_11.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "input" +output_layer_names: "layer_0" +output_layer_names: "layer_1" +output_layer_names: "layer_2" +output_layer_names: "layer_3" +output_layer_names: "layer_4" +output_layer_names: "layer_5" +output_layer_names: "layer_6" +output_layer_names: "layer_7" +output_layer_names: "layer_8" +output_layer_names: "layer_9" +output_layer_names: "layer_10" +output_layer_names: "layer_11" +sub_models { + name: "root" + layer_names: "input" + layer_names: "layer_0" + layer_names: "layer_1" + layer_names: "layer_2" + layer_names: "layer_3" + layer_names: "layer_4" + layer_names: "layer_5" + layer_names: "layer_6" + layer_names: "layer_7" + layer_names: "layer_8" + layer_names: "layer_9" + layer_names: "layer_10" + layer_names: "layer_11" + input_layer_names: "input" + output_layer_names: "layer_0" + output_layer_names: "layer_1" + output_layer_names: "layer_2" + output_layer_names: "layer_3" + output_layer_names: "layer_4" + output_layer_names: "layer_5" + output_layer_names: "layer_6" + output_layer_names: "layer_7" + output_layer_names: "layer_8" + output_layer_names: "layer_9" + output_layer_names: "layer_10" + output_layer_names: "layer_11" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.protostr b/python/paddle/trainer_config_helpers/tests/configs/projections.protostr new file mode 100644 index 0000000000000000000000000000000000000000..a901af6b42431dc6d4c474c78d5ec043bdbd54e8 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/projections.protostr @@ -0,0 +1,315 @@ +type: "nn" +layers { + name: "test" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__embedding_0__" + type: "mixed" + size: 256 + active_type: "" + inputs { + input_layer_name: "test" + input_parameter_name: "___embedding_0__.w0" + proj_conf { + type: "table" + name: "___embedding_0__.w0" + input_size: 100 + output_size: 256 + } + } +} +layers { + name: "__mixed_0__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__embedding_0__" + input_parameter_name: "___mixed_0__.w0" + proj_conf { + type: "fc" + name: "___mixed_0__.w0" + input_size: 256 + output_size: 100 + } + } +} +layers { + name: "__mixed_1__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_0__" + input_parameter_name: "___mixed_1__.w0" + proj_conf { + type: "table" + name: "___mixed_1__.w0" + input_size: 100 + output_size: 100 + } + } +} +layers { + name: "__mixed_2__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_1__" + proj_conf { + type: "identity" + name: "___mixed_2__.w0" + input_size: 100 + output_size: 100 + } + } +} +layers { + name: "__mixed_3__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_2__" + input_parameter_name: "___mixed_3__.w0" + proj_conf { + type: "dot_mul" + name: "___mixed_3__.w0" + input_size: 100 + output_size: 100 + } + } +} +layers { + name: "__mixed_4__" + type: "mixed" + size: 300 + active_type: "" + inputs { + input_layer_name: "__mixed_3__" + input_parameter_name: "___mixed_4__.w0" + proj_conf { + type: "context" + name: "___mixed_4__.w0" + input_size: 100 + output_size: 300 + context_start: -1 + context_length: 3 + trainable_padding: true + } + } +} +layers { + name: "__mixed_5__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_2__" + } + inputs { + input_layer_name: "__mixed_3__" + } + operator_confs { + type: "dot_mul" + input_indices: 0 + input_indices: 1 + input_sizes: 100 + input_sizes: 100 + output_size: 100 + dotmul_scale: 1.0 + } +} +layers { + name: "img" + type: "data" + size: 1024 + active_type: "" +} +layers { + name: "filter" + type: "data" + size: 576 + active_type: "" +} +layers { + name: "__mixed_6__" + type: "mixed" + size: 57600 + active_type: "" + inputs { + input_layer_name: "img" + } + inputs { + input_layer_name: "filter" + } + operator_confs { + type: "conv" + input_indices: 0 + input_indices: 1 + input_sizes: 1024 + input_sizes: 576 + output_size: 57600 + conv_conf { + filter_size: 3 + channels: 1 + stride: 1 + padding: 0 + groups: 1 + filter_channels: 1 + output_x: 30 + img_size: 32 + caffe_mode: true + filter_size_y: 3 + padding_y: 0 + stride_y: 1 + } + num_filters: 64 + } +} +layers { + name: "__mixed_7__" + type: "mixed" + size: 100 + active_type: "" + inputs { + input_layer_name: "__mixed_4__" + input_parameter_name: "___mixed_7__.w0" + proj_conf { + type: "fc" + name: "___mixed_7__.w0" + input_size: 300 + output_size: 100 + } + } + inputs { + input_layer_name: "__mixed_5__" + input_parameter_name: "___mixed_7__.w1" + proj_conf { + type: "trans_fc" + name: "___mixed_7__.w1" + input_size: 100 + output_size: 100 + } + } + inputs { + input_layer_name: "__mixed_6__" + input_parameter_name: "___mixed_7__.w2" + proj_conf { + type: "fc" + name: "___mixed_7__.w2" + input_size: 57600 + output_size: 100 + } + } + drop_rate: 0.5 +} +parameters { + name: "___embedding_0__.w0" + size: 25600 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 256 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_0__.w0" + size: 25600 + initial_mean: 0.0 + initial_std: 0.0625 + dims: 256 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_1__.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_3__.w0" + size: 100 + initial_mean: 0.0 + initial_std: 1.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_4__.w0" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 2 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___mixed_7__.w0" + size: 30000 + initial_mean: 0.0 + initial_std: 0.0577350258827 + dims: 300 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_7__.w1" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_7__.w2" + size: 5760000 + initial_mean: 0.0 + initial_std: 0.00416666688398 + dims: 57600 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "test" +input_layer_names: "img" +input_layer_names: "filter" +output_layer_names: "__mixed_7__" +sub_models { + name: "root" + layer_names: "test" + layer_names: "__embedding_0__" + layer_names: "__mixed_0__" + layer_names: "__mixed_1__" + layer_names: "__mixed_2__" + layer_names: "__mixed_3__" + layer_names: "__mixed_4__" + layer_names: "__mixed_5__" + layer_names: "img" + layer_names: "filter" + layer_names: "__mixed_6__" + layer_names: "__mixed_7__" + input_layer_names: "test" + input_layer_names: "img" + input_layer_names: "filter" + output_layer_names: "__mixed_7__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.protostr new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.protostr new file mode 100644 index 0000000000000000000000000000000000000000..26eed43a459f5d8260f8668d1ab966187a0b277c --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.protostr @@ -0,0 +1,393 @@ +type: "recurrent_nn" +layers { + name: "data_a" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "data_b" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__mixed_0__" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "data_a" + input_parameter_name: "mixed_param" + proj_conf { + type: "fc" + name: "___mixed_0__.w0" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__mixed_1__" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "data_b" + input_parameter_name: "mixed_param" + proj_conf { + type: "fc" + name: "___mixed_1__.w0" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__lstm_group_0___recurrent_group" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "__mixed_0__@__lstm_group_0___recurrent_group" + type: "scatter_agent" + size: 400 + active_type: "" +} +layers { + name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group" + proj_conf { + type: "identity" + name: "___lstm_group_0___input_recurrent.w0" + input_size: 400 + output_size: 400 + } + } + inputs { + input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + input_parameter_name: "lstm_param" + proj_conf { + type: "fc" + name: "___lstm_group_0___input_recurrent.w1" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + type: "lstm_step" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + } + inputs { + input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + } + bias_parameter_name: "lstm_bias" + active_gate_type: "sigmoid" + active_state_type: "sigmoid" +} +layers { + name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + type: "get_output" + size: 100 + active_type: "" + inputs { + input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + input_layer_argument: "state" + } +} +layers { + name: "__lstm_group_0__" + type: "gather_agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_1___recurrent_group" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "__mixed_1__@__lstm_group_1___recurrent_group" + type: "scatter_agent" + size: 400 + active_type: "" +} +layers { + name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "__mixed_1__@__lstm_group_1___recurrent_group" + proj_conf { + type: "identity" + name: "___lstm_group_1___input_recurrent.w0" + input_size: 400 + output_size: 400 + } + } + inputs { + input_layer_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" + input_parameter_name: "lstm_param" + proj_conf { + type: "fc" + name: "___lstm_group_1___input_recurrent.w1" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__lstm_group_1__@__lstm_group_1___recurrent_group" + type: "lstm_step" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" + } + inputs { + input_layer_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" + } + bias_parameter_name: "lstm_bias" + active_gate_type: "sigmoid" + active_state_type: "sigmoid" +} +layers { + name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" + type: "get_output" + size: 100 + active_type: "" + inputs { + input_layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" + input_layer_argument: "state" + } +} +layers { + name: "__lstm_group_1__" + type: "gather_agent" + size: 100 + active_type: "" +} +layers { + name: "__last_seq_0__" + type: "seqlastins" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "__lstm_group_0__" + } + trans_type: "non-seq" +} +layers { + name: "__last_seq_1__" + type: "seqlastins" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "__lstm_group_1__" + } + trans_type: "non-seq" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 10 + active_type: "softmax" + inputs { + input_layer_name: "__last_seq_0__" + input_parameter_name: "softmax_param" + } + inputs { + input_layer_name: "__last_seq_1__" + input_parameter_name: "softmax_param" + } +} +layers { + name: "label" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__cost_0__" + type: "multi-class-cross-entropy" + size: 1 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "label" + } + coeff: 1.0 +} +parameters { + name: "mixed_param" + size: 40000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 400 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "lstm_param" + size: 40000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 400 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "lstm_bias" + size: 300 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 300 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "softmax_param" + size: 1000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 10 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "data_a" +input_layer_names: "data_b" +input_layer_names: "label" +output_layer_names: "__cost_0__" +evaluators { + name: "classification_error_evaluator" + type: "classification_error" + input_layers: "__fc_layer_0__" + input_layers: "label" +} +sub_models { + name: "root" + layer_names: "data_a" + layer_names: "data_b" + layer_names: "__mixed_0__" + layer_names: "__mixed_1__" + layer_names: "__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__" + layer_names: "__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1__" + layer_names: "__last_seq_0__" + layer_names: "__last_seq_1__" + layer_names: "__fc_layer_0__" + layer_names: "label" + layer_names: "__cost_0__" + input_layer_names: "data_a" + input_layer_names: "data_b" + input_layer_names: "label" + output_layer_names: "__cost_0__" + evaluator_names: "classification_error_evaluator" + is_recurrent_layer_group: false +} +sub_models { + name: "__lstm_group_0___recurrent_group" + layer_names: "__mixed_0__@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + is_sequence: false + } + memories { + layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + is_sequence: false + } + in_links { + layer_name: "__mixed_0__" + link_name: "__mixed_0__@__lstm_group_0___recurrent_group" + has_subseq: false + } + out_links { + layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0__" + has_subseq: false + } + target_inlinkid: -1 +} +sub_models { + name: "__lstm_group_1___recurrent_group" + layer_names: "__mixed_1__@__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1___input_recurrent@__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1__@__lstm_group_1___recurrent_group" + layer_names: "__lstm_group_1___state@__lstm_group_1___recurrent_group" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" + link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" + is_sequence: false + } + memories { + layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" + link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" + is_sequence: false + } + in_links { + layer_name: "__mixed_1__" + link_name: "__mixed_1__@__lstm_group_1___recurrent_group" + has_subseq: false + } + out_links { + layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" + link_name: "__lstm_group_1__" + has_subseq: false + } + target_inlinkid: -1 +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..57445243bd06f7504baf861c6ebd19aa1694e69e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.protostr @@ -0,0 +1,418 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 200 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "data" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__recurrent_layer_0__" + type: "recurrent" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___recurrent_layer_0__.w0" + } + bias_parameter_name: "___recurrent_layer_0__.wbias" + reversed: false +} +layers { + name: "__recurrent_layer_1__" + type: "recurrent" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___recurrent_layer_1__.w0" + } + bias_parameter_name: "___recurrent_layer_1__.wbias" + reversed: true +} +layers { + name: "__fc_layer_1__" + type: "fc" + size: 800 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___fc_layer_1__.w0" + } +} +layers { + name: "__lstmemory_0__" + type: "lstmemory" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_1__" + input_parameter_name: "___lstmemory_0__.w0" + } + bias_parameter_name: "___lstmemory_0__.wbias" + reversed: false + active_gate_type: "sigmoid" + active_state_type: "tanh" +} +layers { + name: "__fc_layer_2__" + type: "fc" + size: 800 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___fc_layer_2__.w0" + } +} +layers { + name: "__lstmemory_1__" + type: "lstmemory" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_2__" + input_parameter_name: "___lstmemory_1__.w0" + } + bias_parameter_name: "___lstmemory_1__.wbias" + reversed: true + active_gate_type: "sigmoid" + active_state_type: "tanh" +} +layers { + name: "__fc_layer_3__" + type: "fc" + size: 600 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___fc_layer_3__.w0" + } +} +layers { + name: "__gru_0__" + type: "gated_recurrent" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_3__" + input_parameter_name: "___gru_0__.w0" + } + bias_parameter_name: "___gru_0__.wbias" + reversed: false + active_gate_type: "sigmoid" +} +layers { + name: "__fc_layer_4__" + type: "fc" + size: 600 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___fc_layer_4__.w0" + } +} +layers { + name: "__gru_1__" + type: "gated_recurrent" + size: 200 + active_type: "sigmoid" + inputs { + input_layer_name: "__fc_layer_4__" + input_parameter_name: "___gru_1__.w0" + } + bias_parameter_name: "___gru_1__.wbias" + reversed: true + active_gate_type: "sigmoid" +} +layers { + name: "__last_seq_0__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__recurrent_layer_0__" + } + trans_type: "non-seq" +} +layers { + name: "__first_seq_0__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__recurrent_layer_1__" + } + select_first: true + trans_type: "non-seq" +} +layers { + name: "__last_seq_1__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__lstmemory_0__" + } + trans_type: "non-seq" +} +layers { + name: "__first_seq_1__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__lstmemory_1__" + } + select_first: true + trans_type: "non-seq" +} +layers { + name: "__last_seq_2__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__gru_0__" + } + trans_type: "non-seq" +} +layers { + name: "__first_seq_2__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "__gru_1__" + } + select_first: true + trans_type: "non-seq" +} +parameters { + name: "___fc_layer_0__.w0" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___recurrent_layer_0__.w0" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___recurrent_layer_0__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___recurrent_layer_1__.w0" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___recurrent_layer_1__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_1__.w0" + size: 160000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 800 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstmemory_0__.w0" + size: 160000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + dims: 4 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstmemory_0__.wbias" + size: 1400 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1400 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_2__.w0" + size: 160000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 800 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstmemory_1__.w0" + size: 160000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + dims: 4 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstmemory_1__.wbias" + size: 1400 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1400 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_3__.w0" + size: 120000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 600 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___gru_0__.w0" + size: 120000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 600 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___gru_0__.wbias" + size: 600 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 600 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_4__.w0" + size: 120000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 600 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___gru_1__.w0" + size: 120000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 600 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___gru_1__.wbias" + size: 600 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 600 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__last_seq_0__" +output_layer_names: "__first_seq_0__" +output_layer_names: "__last_seq_1__" +output_layer_names: "__first_seq_1__" +output_layer_names: "__last_seq_2__" +output_layer_names: "__first_seq_2__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__fc_layer_0__" + layer_names: "__recurrent_layer_0__" + layer_names: "__recurrent_layer_1__" + layer_names: "__fc_layer_1__" + layer_names: "__lstmemory_0__" + layer_names: "__fc_layer_2__" + layer_names: "__lstmemory_1__" + layer_names: "__fc_layer_3__" + layer_names: "__gru_0__" + layer_names: "__fc_layer_4__" + layer_names: "__gru_1__" + layer_names: "__last_seq_0__" + layer_names: "__first_seq_0__" + layer_names: "__last_seq_1__" + layer_names: "__first_seq_1__" + layer_names: "__last_seq_2__" + layer_names: "__first_seq_2__" + input_layer_names: "data" + output_layer_names: "__last_seq_0__" + output_layer_names: "__first_seq_0__" + output_layer_names: "__last_seq_1__" + output_layer_names: "__first_seq_1__" + output_layer_names: "__last_seq_2__" + output_layer_names: "__first_seq_2__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.protostr new file mode 100644 index 0000000000000000000000000000000000000000..278088d4abd50b1ab477973dec297d97d1ba1384 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.protostr @@ -0,0 +1,125 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 2304 + active_type: "" +} +layers { + name: "__conv_0__" + type: "exconv" + size: 36864 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 3 + channels: 1 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 1 + output_x: 48 + img_size: 48 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 1 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 16 + shared_biases: true +} +layers { + name: "__bilinear_interp_layer_0__" + type: "bilinear_interp" + size: 36864 + active_type: "" + inputs { + input_layer_name: "__conv_0__" + bilinear_interp_conf { + img_size_x: 32 + img_size_y: 32 + out_size_x: 64 + out_size_y: 64 + num_channels: 16 + } + } +} +layers { + name: "__pool_0__" + type: "pool" + size: 9216 + active_type: "" + inputs { + input_layer_name: "__bilinear_interp_layer_0__" + pool_conf { + pool_type: "max-projection" + channels: 4 + size_x: 2 + stride: 2 + output_x: 48 + img_size: 96 + padding: 0 + size_y: 2 + stride_y: 2 + output_y: 48 + img_size_y: 96 + padding_y: 0 + } + } +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 384 + active_type: "tanh" + inputs { + input_layer_name: "__pool_0__" + input_parameter_name: "___fc_layer_0__.w0" + } +} +parameters { + name: "___conv_0__.w0" + size: 144 + initial_mean: 0.0 + initial_std: 0.471404522657 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_0__.w0" + size: 3538944 + initial_mean: 0.0 + initial_std: 0.0104166669771 + dims: 9216 + dims: 384 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "data" +output_layer_names: "__fc_layer_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__conv_0__" + layer_names: "__bilinear_interp_layer_0__" + layer_names: "__pool_0__" + layer_names: "__fc_layer_0__" + input_layer_names: "data" + output_layer_names: "__fc_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..c37586f4068e4a5c07ab8c7ab6ca365fb6bf4fe6 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.protostr @@ -0,0 +1,289 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 200 + active_type: "" +} +layers { + name: "labels" + type: "data" + size: 5000 + active_type: "" +} +layers { + name: "probs" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "xe-label" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__ctc_layer_0__" + type: "ctc" + size: 5001 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "labels" + } + norm_by_times: false +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 4 + active_type: "tanh" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "crf_label" + type: "data" + size: 4 + active_type: "" +} +layers { + name: "__crf_layer_0__" + type: "crf" + size: 4 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + input_parameter_name: "___crf_layer_0__.w0" + } + inputs { + input_layer_name: "crf_label" + } + coeff: 1.0 +} +layers { + name: "left" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "right" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__rank_cost_0__" + type: "rank-cost" + size: 1 + active_type: "" + inputs { + input_layer_name: "left" + } + inputs { + input_layer_name: "right" + } + inputs { + input_layer_name: "label" + } + coeff: 1.0 +} +layers { + name: "list_feature" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "list_scores" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__lambda_cost_0__" + type: "lambda_cost" + size: 1 + active_type: "" + inputs { + input_layer_name: "list_feature" + } + inputs { + input_layer_name: "list_scores" + } + NDCG_num: 5 + max_sort_size: -1 +} +layers { + name: "__cross_entropy_0__" + type: "multi-class-cross-entropy" + size: 1 + active_type: "" + inputs { + input_layer_name: "probs" + } + inputs { + input_layer_name: "xe-label" + } + coeff: 1.0 +} +layers { + name: "__cross_entropy_with_selfnorm_0__" + type: "multi_class_cross_entropy_with_selfnorm" + active_type: "" + inputs { + input_layer_name: "probs" + } + inputs { + input_layer_name: "xe-label" + } + softmax_selfnorm_alpha: 0.10000000149 + coeff: 1.0 +} +layers { + name: "huber_probs" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "huber_label" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__huber_cost_0__" + type: "huber" + size: 1 + active_type: "" + inputs { + input_layer_name: "huber_probs" + } + inputs { + input_layer_name: "huber_label" + } + coeff: 1.0 +} +layers { + name: "__multi_binary_label_cross_entropy_0__" + type: "multi_binary_label_cross_entropy" + size: 1 + active_type: "" + inputs { + input_layer_name: "probs" + } + inputs { + input_layer_name: "xe-label" + } + coeff: 1.0 +} +parameters { + name: "___fc_layer_0__.w0" + size: 800 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 4 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 4 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 4 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___crf_layer_0__.w0" + size: 24 + initial_mean: 0.0 + initial_std: 0.5 + dims: 4 + dims: 6 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "input" +input_layer_names: "labels" +input_layer_names: "crf_label" +input_layer_names: "left" +input_layer_names: "right" +input_layer_names: "label" +input_layer_names: "list_feature" +input_layer_names: "list_scores" +input_layer_names: "probs" +input_layer_names: "xe-label" +input_layer_names: "huber_probs" +input_layer_names: "huber_label" +output_layer_names: "__ctc_layer_0__" +output_layer_names: "__crf_layer_0__" +output_layer_names: "__rank_cost_0__" +output_layer_names: "__lambda_cost_0__" +output_layer_names: "__cross_entropy_0__" +output_layer_names: "__cross_entropy_with_selfnorm_0__" +output_layer_names: "__huber_cost_0__" +output_layer_names: "__multi_binary_label_cross_entropy_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "labels" + layer_names: "probs" + layer_names: "xe-label" + layer_names: "__ctc_layer_0__" + layer_names: "__fc_layer_0__" + layer_names: "crf_label" + layer_names: "__crf_layer_0__" + layer_names: "left" + layer_names: "right" + layer_names: "label" + layer_names: "__rank_cost_0__" + layer_names: "list_feature" + layer_names: "list_scores" + layer_names: "__lambda_cost_0__" + layer_names: "__cross_entropy_0__" + layer_names: "__cross_entropy_with_selfnorm_0__" + layer_names: "huber_probs" + layer_names: "huber_label" + layer_names: "__huber_cost_0__" + layer_names: "__multi_binary_label_cross_entropy_0__" + input_layer_names: "input" + input_layer_names: "labels" + input_layer_names: "crf_label" + input_layer_names: "left" + input_layer_names: "right" + input_layer_names: "label" + input_layer_names: "list_feature" + input_layer_names: "list_scores" + input_layer_names: "probs" + input_layer_names: "xe-label" + input_layer_names: "huber_probs" + input_layer_names: "huber_label" + output_layer_names: "__ctc_layer_0__" + output_layer_names: "__crf_layer_0__" + output_layer_names: "__rank_cost_0__" + output_layer_names: "__lambda_cost_0__" + output_layer_names: "__cross_entropy_0__" + output_layer_names: "__cross_entropy_with_selfnorm_0__" + output_layer_names: "__huber_cost_0__" + output_layer_names: "__multi_binary_label_cross_entropy_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.protostr new file mode 100644 index 0000000000000000000000000000000000000000..de58f5c64969b1f921a3c8b9d4f26841caec1262 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.protostr @@ -0,0 +1,111 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "weight" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 10 + active_type: "softmax" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__cost_0__" + type: "multi-class-cross-entropy" + size: 1 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "label" + } + inputs { + input_layer_name: "weight" + } + coeff: 1.0 +} +layers { + name: "__regression_cost_0__" + type: "square_error" + size: 1 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "label" + } + inputs { + input_layer_name: "weight" + } + coeff: 1.0 +} +parameters { + name: "___fc_layer_0__.w0" + size: 3000 + initial_mean: 0.0 + initial_std: 0.0577350258827 + dims: 300 + dims: 10 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 10 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 10 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "input" +input_layer_names: "label" +input_layer_names: "weight" +output_layer_names: "__cost_0__" +output_layer_names: "__regression_cost_0__" +evaluators { + name: "classification_error_evaluator" + type: "classification_error" + input_layers: "__fc_layer_0__" + input_layers: "label" + input_layers: "weight" +} +sub_models { + name: "root" + layer_names: "input" + layer_names: "label" + layer_names: "weight" + layer_names: "__fc_layer_0__" + layer_names: "__cost_0__" + layer_names: "__regression_cost_0__" + input_layer_names: "input" + input_layer_names: "label" + input_layer_names: "weight" + output_layer_names: "__cost_0__" + output_layer_names: "__regression_cost_0__" + evaluator_names: "classification_error_evaluator" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..f4b36052264bc41b4c06826c3b3c1428c103add7 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.protostr @@ -0,0 +1,56 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "data_seq" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "__expand_layer_0__" + type: "expand" + size: 30 + active_type: "" + inputs { + input_layer_name: "data" + } + inputs { + input_layer_name: "data_seq" + } + trans_type: "seq" +} +layers { + name: "__expand_layer_1__" + type: "expand" + size: 30 + active_type: "" + inputs { + input_layer_name: "data" + } + inputs { + input_layer_name: "data_seq" + } + trans_type: "non-seq" +} +input_layer_names: "data" +input_layer_names: "data_seq" +output_layer_names: "__expand_layer_0__" +output_layer_names: "__expand_layer_1__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "data_seq" + layer_names: "__expand_layer_0__" + layer_names: "__expand_layer_1__" + input_layer_names: "data" + input_layer_names: "data_seq" + output_layer_names: "__expand_layer_0__" + output_layer_names: "__expand_layer_1__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_fc.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_fc.protostr new file mode 100644 index 0000000000000000000000000000000000000000..80b01246ba96f8c22dd57ac555101a25e0bd82eb --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.protostr @@ -0,0 +1,98 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__trans_layer_0__" + type: "trans" + size: 100 + active_type: "" + inputs { + input_layer_name: "data" + } +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__trans_layer_0__" + input_parameter_name: "___fc_layer_0__.w0" + } +} +layers { + name: "mask" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__selective_fc_layer_0__" + type: "selective_fc" + size: 100 + active_type: "sigmoid" + inputs { + input_layer_name: "data" + input_parameter_name: "___selective_fc_layer_0__.w0" + } + inputs { + input_layer_name: "mask" + } + bias_parameter_name: "___selective_fc_layer_0__.wbias" + selective_fc_pass_generation: false + has_selected_colums: true + selective_fc_full_mul_ratio: 0.019999999553 +} +parameters { + name: "___fc_layer_0__.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___selective_fc_layer_0__.w0" + size: 10000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + initial_strategy: 0 + initial_smart: true + is_sparse: false +} +parameters { + name: "___selective_fc_layer_0__.wbias" + size: 100 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 100 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +input_layer_names: "mask" +output_layer_names: "__fc_layer_0__" +output_layer_names: "__selective_fc_layer_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__trans_layer_0__" + layer_names: "__fc_layer_0__" + layer_names: "mask" + layer_names: "__selective_fc_layer_0__" + input_layer_names: "data" + input_layer_names: "mask" + output_layer_names: "__fc_layer_0__" + output_layer_names: "__selective_fc_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..81577910ccf34676c23706acc07bbcb3d629dd65 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.protostr @@ -0,0 +1,51 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 120 + active_type: "" +} +layers { + name: "__gru_0__" + type: "gated_recurrent" + size: 40 + active_type: "sigmoid" + inputs { + input_layer_name: "data" + input_parameter_name: "___gru_0__.w0" + } + bias_parameter_name: "___gru_0__.wbias" + reversed: true + active_gate_type: "tanh" +} +parameters { + name: "___gru_0__.w0" + size: 4800 + initial_mean: 0.0 + initial_std: 0.158113881946 + dims: 40 + dims: 120 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___gru_0__.wbias" + size: 120 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 120 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__gru_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__gru_0__" + input_layer_names: "data" + output_layer_names: "__gru_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.protostr new file mode 100644 index 0000000000000000000000000000000000000000..e8cc61b8c541082237337329fd2bda6be3d0498f --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.protostr @@ -0,0 +1,62 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__hsigmoid_0__" + type: "hsigmoid" + size: 1 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___hsigmoid_0__.w0" + } + inputs { + input_layer_name: "label" + } + bias_parameter_name: "___hsigmoid_0__.wbias" + num_classes: 10 +} +parameters { + name: "___hsigmoid_0__.w0" + size: 900 + initial_mean: 0.0 + initial_std: 0.333333343267 + dims: 9 + dims: 100 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___hsigmoid_0__.wbias" + size: 9 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 9 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +input_layer_names: "label" +output_layer_names: "__hsigmoid_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "label" + layer_names: "__hsigmoid_0__" + input_layer_names: "data" + input_layer_names: "label" + output_layer_names: "__hsigmoid_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..8341cd2684746a08856a8d37976cfb8bbc84a8c1 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.protostr @@ -0,0 +1,53 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 128 + active_type: "" +} +layers { + name: "__lstmemory_0__" + type: "lstmemory" + size: 32 + active_type: "tanh" + inputs { + input_layer_name: "data" + input_parameter_name: "___lstmemory_0__.w0" + } + bias_parameter_name: "___lstmemory_0__.wbias" + reversed: true + active_gate_type: "tanh" + active_state_type: "tanh" +} +parameters { + name: "___lstmemory_0__.w0" + size: 4096 + initial_mean: 0.0 + initial_std: 0.176776692271 + dims: 32 + dims: 32 + dims: 4 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstmemory_0__.wbias" + size: 224 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 224 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__lstmemory_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__lstmemory_0__" + input_layer_names: "data" + output_layer_names: "__lstmemory_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.protostr new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..44400e2c3a23dd1e48dc4c29d20cca5dd5f79e29 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.protostr @@ -0,0 +1,225 @@ +type: "nn" +layers { + name: "w" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "a" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "b" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "c" + type: "data" + size: 200 + active_type: "" +} +layers { + name: "d" + type: "data" + size: 31 + active_type: "" +} +layers { + name: "__interpolation_layer_0__" + type: "interpolation" + size: 100 + active_type: "" + inputs { + input_layer_name: "w" + } + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "b" + } +} +layers { + name: "__power_layer_0__" + type: "power" + size: 100 + active_type: "" + inputs { + input_layer_name: "w" + } + inputs { + input_layer_name: "a" + } +} +layers { + name: "__scaling_layer_0__" + type: "scaling" + size: 100 + active_type: "" + inputs { + input_layer_name: "w" + } + inputs { + input_layer_name: "a" + } +} +layers { + name: "__cos_sim_0__" + type: "cos" + size: 1 + active_type: "" + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "b" + } + cos_scale: 5.0 +} +layers { + name: "__cos_sim_1__" + type: "cos_vm" + size: 2 + active_type: "" + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "c" + } + cos_scale: 5.0 +} +layers { + name: "__sum_to_one_norm_layer_0__" + type: "sum_to_one_norm" + size: 100 + active_type: "" + inputs { + input_layer_name: "a" + } +} +layers { + name: "__conv_shift_layer_0__" + type: "conv_shift" + size: 100 + active_type: "" + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "d" + } +} +layers { + name: "__tensor_layer_0__" + type: "tensor" + size: 1000 + active_type: "" + inputs { + input_layer_name: "a" + input_parameter_name: "___tensor_layer_0__.w0" + } + inputs { + input_layer_name: "b" + } + bias_parameter_name: "___tensor_layer_0__.wbias" +} +layers { + name: "__slope_intercept_layer_0__" + type: "slope_intercept" + size: 100 + active_type: "" + inputs { + input_layer_name: "a" + } + slope: 0.699999988079 + intercept: 0.899999976158 +} +layers { + name: "__linear_comb_layer_0__" + type: "convex_comb" + size: 2 + active_type: "" + inputs { + input_layer_name: "b" + } + inputs { + input_layer_name: "c" + } +} +parameters { + name: "___tensor_layer_0__.w0" + size: 10000000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 100 + dims: 1000 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___tensor_layer_0__.wbias" + size: 1000 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1000 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "w" +input_layer_names: "a" +input_layer_names: "b" +input_layer_names: "c" +input_layer_names: "d" +output_layer_names: "__interpolation_layer_0__" +output_layer_names: "__power_layer_0__" +output_layer_names: "__scaling_layer_0__" +output_layer_names: "__cos_sim_0__" +output_layer_names: "__cos_sim_1__" +output_layer_names: "__sum_to_one_norm_layer_0__" +output_layer_names: "__conv_shift_layer_0__" +output_layer_names: "__tensor_layer_0__" +output_layer_names: "__slope_intercept_layer_0__" +output_layer_names: "__linear_comb_layer_0__" +sub_models { + name: "root" + layer_names: "w" + layer_names: "a" + layer_names: "b" + layer_names: "c" + layer_names: "d" + layer_names: "__interpolation_layer_0__" + layer_names: "__power_layer_0__" + layer_names: "__scaling_layer_0__" + layer_names: "__cos_sim_0__" + layer_names: "__cos_sim_1__" + layer_names: "__sum_to_one_norm_layer_0__" + layer_names: "__conv_shift_layer_0__" + layer_names: "__tensor_layer_0__" + layer_names: "__slope_intercept_layer_0__" + layer_names: "__linear_comb_layer_0__" + input_layer_names: "w" + input_layer_names: "a" + input_layer_names: "b" + input_layer_names: "c" + input_layer_names: "d" + output_layer_names: "__interpolation_layer_0__" + output_layer_names: "__power_layer_0__" + output_layer_names: "__scaling_layer_0__" + output_layer_names: "__cos_sim_0__" + output_layer_names: "__cos_sim_1__" + output_layer_names: "__sum_to_one_norm_layer_0__" + output_layer_names: "__conv_shift_layer_0__" + output_layer_names: "__tensor_layer_0__" + output_layer_names: "__slope_intercept_layer_0__" + output_layer_names: "__linear_comb_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..c402aff174ab7c7d7f63234960d4a24d84622dd4 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.protostr @@ -0,0 +1,26 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__print_0__" + type: "print" + active_type: "" + inputs { + input_layer_name: "input" + } +} +input_layer_names: "input" +output_layer_names: "input" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__print_0__" + input_layer_names: "input" + output_layer_names: "input" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.protostr new file mode 100644 index 0000000000000000000000000000000000000000..dfb5ce20a31a0157b32d06138a4482f630a64c33 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.protostr @@ -0,0 +1,650 @@ +type: "recurrent_nn" +layers { + name: "seq_input" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "sub_seq_input" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "label" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__mixed_0__" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "seq_input" + input_parameter_name: "___mixed_0__.w0" + proj_conf { + type: "fc" + name: "___mixed_0__.w0" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__mixed_1__" + type: "mixed" + size: 300 + active_type: "" + inputs { + input_layer_name: "seq_input" + input_parameter_name: "___mixed_1__.w0" + proj_conf { + type: "fc" + name: "___mixed_1__.w0" + input_size: 100 + output_size: 300 + } + } +} +layers { + name: "__recurrent_group_0__" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "seq_input@__recurrent_group_0__" + type: "scatter_agent" + size: 100 + active_type: "" +} +layers { + name: "rnn_forward+delay1@__recurrent_group_0__" + type: "agent" + size: 200 + active_type: "" +} +layers { + name: "rnn_forward@__recurrent_group_0__" + type: "fc" + size: 200 + active_type: "tanh" + inputs { + input_layer_name: "seq_input@__recurrent_group_0__" + input_parameter_name: "_rnn_forward@__recurrent_group_0__.w0" + } + inputs { + input_layer_name: "rnn_forward+delay1@__recurrent_group_0__" + input_parameter_name: "_rnn_forward@__recurrent_group_0__.w1" + } + bias_parameter_name: "_rnn_forward@__recurrent_group_0__.wbias" +} +layers { + name: "rnn_forward" + type: "gather_agent" + size: 200 + active_type: "" +} +layers { + name: "__last_seq_0__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "rnn_forward" + } + trans_type: "non-seq" +} +layers { + name: "__recurrent_group_1__" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "seq_input@__recurrent_group_1__" + type: "scatter_agent" + size: 100 + active_type: "" +} +layers { + name: "rnn_back+delay1@__recurrent_group_1__" + type: "agent" + size: 200 + active_type: "" +} +layers { + name: "rnn_back@__recurrent_group_1__" + type: "fc" + size: 200 + active_type: "tanh" + inputs { + input_layer_name: "seq_input@__recurrent_group_1__" + input_parameter_name: "_rnn_back@__recurrent_group_1__.w0" + } + inputs { + input_layer_name: "rnn_back+delay1@__recurrent_group_1__" + input_parameter_name: "_rnn_back@__recurrent_group_1__.w1" + } + bias_parameter_name: "_rnn_back@__recurrent_group_1__.wbias" +} +layers { + name: "rnn_back" + type: "gather_agent" + size: 200 + active_type: "" +} +layers { + name: "__first_seq_0__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "rnn_back" + } + select_first: true + trans_type: "non-seq" +} +layers { + name: "__recurrent_group_2__" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "sub_seq_input@__recurrent_group_2__" + type: "sequence_scatter_agent" + size: 100 + active_type: "" +} +layers { + name: "rnn_subseq_forward+delay1@__recurrent_group_2__" + type: "agent" + size: 200 + active_type: "" +} +layers { + name: "rnn_subseq_forward@__recurrent_group_2__" + type: "fc" + size: 200 + active_type: "tanh" + inputs { + input_layer_name: "sub_seq_input@__recurrent_group_2__" + input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w0" + } + inputs { + input_layer_name: "rnn_subseq_forward+delay1@__recurrent_group_2__" + input_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.w1" + } + bias_parameter_name: "_rnn_subseq_forward@__recurrent_group_2__.wbias" +} +layers { + name: "rnn_subseq_forward" + type: "sequence_gather_agent" + size: 200 + active_type: "" +} +layers { + name: "__last_seq_1__" + type: "seqlastins" + size: 200 + active_type: "linear" + inputs { + input_layer_name: "rnn_subseq_forward" + } + trans_type: "non-seq" +} +layers { + name: "__lstm_group_0___recurrent_group" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "__mixed_0__@__lstm_group_0___recurrent_group" + type: "scatter_agent" + size: 400 + active_type: "" +} +layers { + name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + type: "mixed" + size: 400 + active_type: "" + inputs { + input_layer_name: "__mixed_0__@__lstm_group_0___recurrent_group" + proj_conf { + type: "identity" + name: "___lstm_group_0___input_recurrent.w0" + input_size: 400 + output_size: 400 + } + } + inputs { + input_layer_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + input_parameter_name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1" + proj_conf { + type: "fc" + name: "___lstm_group_0___input_recurrent.w1" + input_size: 100 + output_size: 400 + } + } +} +layers { + name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + type: "lstm_step" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + } + inputs { + input_layer_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + } + bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" + active_gate_type: "sigmoid" + active_state_type: "sigmoid" +} +layers { + name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + type: "get_output" + size: 100 + active_type: "" + inputs { + input_layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + input_layer_argument: "state" + } +} +layers { + name: "__lstm_group_0__" + type: "gather_agent" + size: 100 + active_type: "" +} +layers { + name: "__last_seq_2__" + type: "seqlastins" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "__lstm_group_0__" + } + trans_type: "non-seq" +} +layers { + name: "__gru_group_0___recurrent_group" + type: "recurrent_layer_group" + active_type: "" +} +layers { + name: "__mixed_1__@__gru_group_0___recurrent_group" + type: "scatter_agent" + size: 300 + active_type: "" +} +layers { + name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" + type: "agent" + size: 100 + active_type: "" +} +layers { + name: "__gru_group_0__@__gru_group_0___recurrent_group" + type: "gru_step" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group" + input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" + } + inputs { + input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" + } + bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias" + active_gate_type: "sigmoid" +} +layers { + name: "__gru_group_0__" + type: "gather_agent" + size: 100 + active_type: "" +} +layers { + name: "__last_seq_3__" + type: "seqlastins" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "__gru_group_0__" + } + trans_type: "non-seq" +} +parameters { + name: "___mixed_0__.w0" + size: 40000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 400 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___mixed_1__.w0" + size: 30000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 300 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_forward@__recurrent_group_0__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_forward@__recurrent_group_0__.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_forward@__recurrent_group_0__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_rnn_back@__recurrent_group_1__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_back@__recurrent_group_1__.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_back@__recurrent_group_1__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "_rnn_subseq_forward@__recurrent_group_2__.w0" + size: 20000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_subseq_forward@__recurrent_group_2__.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.0707106813788 + dims: 200 + dims: 200 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "_rnn_subseq_forward@__recurrent_group_2__.wbias" + size: 200 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 200 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group.w1" + size: 40000 + initial_mean: 0.0 + initial_std: 0.10000000149 + dims: 100 + dims: 400 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias" + size: 300 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 300 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" + size: 30000 + initial_mean: 0.0 + initial_std: 0.00999999977648 + dims: 100 + dims: 300 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias" + size: 300 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 300 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "seq_input" +input_layer_names: "sub_seq_input" +output_layer_names: "__last_seq_0__" +output_layer_names: "__first_seq_0__" +output_layer_names: "__last_seq_1__" +output_layer_names: "__last_seq_2__" +output_layer_names: "__last_seq_3__" +sub_models { + name: "root" + layer_names: "seq_input" + layer_names: "sub_seq_input" + layer_names: "label" + layer_names: "__mixed_0__" + layer_names: "__mixed_1__" + layer_names: "__recurrent_group_0__" + layer_names: "rnn_forward" + layer_names: "__last_seq_0__" + layer_names: "__recurrent_group_1__" + layer_names: "rnn_back" + layer_names: "__first_seq_0__" + layer_names: "__recurrent_group_2__" + layer_names: "rnn_subseq_forward" + layer_names: "__last_seq_1__" + layer_names: "__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__" + layer_names: "__last_seq_2__" + layer_names: "__gru_group_0___recurrent_group" + layer_names: "__gru_group_0__" + layer_names: "__last_seq_3__" + input_layer_names: "seq_input" + input_layer_names: "sub_seq_input" + output_layer_names: "__last_seq_0__" + output_layer_names: "__first_seq_0__" + output_layer_names: "__last_seq_1__" + output_layer_names: "__last_seq_2__" + output_layer_names: "__last_seq_3__" + is_recurrent_layer_group: false +} +sub_models { + name: "__recurrent_group_0__" + layer_names: "seq_input@__recurrent_group_0__" + layer_names: "rnn_forward+delay1@__recurrent_group_0__" + layer_names: "rnn_forward@__recurrent_group_0__" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "rnn_forward@__recurrent_group_0__" + link_name: "rnn_forward+delay1@__recurrent_group_0__" + is_sequence: false + } + in_links { + layer_name: "seq_input" + link_name: "seq_input@__recurrent_group_0__" + has_subseq: false + } + out_links { + layer_name: "rnn_forward@__recurrent_group_0__" + link_name: "rnn_forward" + has_subseq: false + } + target_inlinkid: -1 +} +sub_models { + name: "__recurrent_group_1__" + layer_names: "seq_input@__recurrent_group_1__" + layer_names: "rnn_back+delay1@__recurrent_group_1__" + layer_names: "rnn_back@__recurrent_group_1__" + is_recurrent_layer_group: true + reversed: true + memories { + layer_name: "rnn_back@__recurrent_group_1__" + link_name: "rnn_back+delay1@__recurrent_group_1__" + is_sequence: false + } + in_links { + layer_name: "seq_input" + link_name: "seq_input@__recurrent_group_1__" + has_subseq: false + } + out_links { + layer_name: "rnn_back@__recurrent_group_1__" + link_name: "rnn_back" + has_subseq: false + } + target_inlinkid: -1 +} +sub_models { + name: "__recurrent_group_2__" + layer_names: "sub_seq_input@__recurrent_group_2__" + layer_names: "rnn_subseq_forward+delay1@__recurrent_group_2__" + layer_names: "rnn_subseq_forward@__recurrent_group_2__" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "rnn_subseq_forward@__recurrent_group_2__" + link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__" + is_sequence: false + } + in_links { + layer_name: "sub_seq_input" + link_name: "sub_seq_input@__recurrent_group_2__" + has_subseq: true + } + out_links { + layer_name: "rnn_subseq_forward@__recurrent_group_2__" + link_name: "rnn_subseq_forward" + has_subseq: true + } + target_inlinkid: -1 +} +sub_models { + name: "__lstm_group_0___recurrent_group" + layer_names: "__mixed_0__@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___input_recurrent@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0__@__lstm_group_0___recurrent_group" + layer_names: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" + is_sequence: false + } + memories { + layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" + is_sequence: false + } + in_links { + layer_name: "__mixed_0__" + link_name: "__mixed_0__@__lstm_group_0___recurrent_group" + has_subseq: false + } + out_links { + layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" + link_name: "__lstm_group_0__" + has_subseq: false + } + target_inlinkid: -1 +} +sub_models { + name: "__gru_group_0___recurrent_group" + layer_names: "__mixed_1__@__gru_group_0___recurrent_group" + layer_names: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" + layer_names: "__gru_group_0__@__gru_group_0___recurrent_group" + is_recurrent_layer_group: true + reversed: false + memories { + layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" + link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" + is_sequence: false + } + in_links { + layer_name: "__mixed_1__" + link_name: "__mixed_1__@__gru_group_0___recurrent_group" + has_subseq: false + } + out_links { + layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" + link_name: "__gru_group_0__" + has_subseq: false + } + target_inlinkid: -1 +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.protostr new file mode 100644 index 0000000000000000000000000000000000000000..1999c006d237eb449d59c8e8a2a83c1e4fab9d0e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.protostr @@ -0,0 +1,111 @@ +type: "nn" +layers { + name: "dat_in" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__seq_pooling_0__" + type: "max" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + trans_type: "seq" +} +layers { + name: "__seq_pooling_1__" + type: "max" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + trans_type: "non-seq" +} +layers { + name: "__seq_pooling_2__" + type: "average" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "average" + trans_type: "seq" +} +layers { + name: "__seq_pooling_3__" + type: "average" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "average" + trans_type: "non-seq" +} +layers { + name: "__seq_pooling_4__" + type: "average" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "sum" + trans_type: "seq" +} +layers { + name: "__seq_pooling_5__" + type: "average" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + average_strategy: "sum" + trans_type: "non-seq" +} +layers { + name: "__seq_pooling_6__" + type: "max" + size: 100 + active_type: "linear" + inputs { + input_layer_name: "dat_in" + } + output_max_index: true + trans_type: "non-seq" +} +input_layer_names: "dat_in" +output_layer_names: "__seq_pooling_0__" +output_layer_names: "__seq_pooling_1__" +output_layer_names: "__seq_pooling_2__" +output_layer_names: "__seq_pooling_3__" +output_layer_names: "__seq_pooling_4__" +output_layer_names: "__seq_pooling_5__" +output_layer_names: "__seq_pooling_6__" +sub_models { + name: "root" + layer_names: "dat_in" + layer_names: "__seq_pooling_0__" + layer_names: "__seq_pooling_1__" + layer_names: "__seq_pooling_2__" + layer_names: "__seq_pooling_3__" + layer_names: "__seq_pooling_4__" + layer_names: "__seq_pooling_5__" + layer_names: "__seq_pooling_6__" + input_layer_names: "dat_in" + output_layer_names: "__seq_pooling_0__" + output_layer_names: "__seq_pooling_1__" + output_layer_names: "__seq_pooling_2__" + output_layer_names: "__seq_pooling_3__" + output_layer_names: "__seq_pooling_4__" + output_layer_names: "__seq_pooling_5__" + output_layer_names: "__seq_pooling_6__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..89ed28406e553ba93bec8c86879a85f0a5c1caa1 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.protostr @@ -0,0 +1,27 @@ +type: "nn" +layers { + name: "probs" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__sampling_id_layer_0__" + type: "sampling_id" + size: 100 + active_type: "" + inputs { + input_layer_name: "probs" + } +} +input_layer_names: "probs" +output_layer_names: "__sampling_id_layer_0__" +sub_models { + name: "root" + layer_names: "probs" + layer_names: "__sampling_id_layer_0__" + input_layer_names: "probs" + output_layer_names: "__sampling_id_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/util_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/util_layers.protostr new file mode 100644 index 0000000000000000000000000000000000000000..d0ad388165007b8f96f059e5b003c52f756383e5 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.protostr @@ -0,0 +1,81 @@ +type: "nn" +layers { + name: "a" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "b" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__addto_0__" + type: "addto" + size: 10 + active_type: "" + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "b" + } +} +layers { + name: "__concat_0__" + type: "concat" + size: 20 + active_type: "" + inputs { + input_layer_name: "a" + } + inputs { + input_layer_name: "b" + } +} +layers { + name: "__concat_1__" + type: "concat2" + size: 20 + active_type: "" + inputs { + input_layer_name: "a" + proj_conf { + type: "identity" + name: "___concat_1__.w0" + input_size: 10 + output_size: 10 + } + } + inputs { + input_layer_name: "b" + proj_conf { + type: "identity" + name: "___concat_1__.w1" + input_size: 10 + output_size: 10 + } + } +} +input_layer_names: "a" +input_layer_names: "b" +output_layer_names: "__addto_0__" +output_layer_names: "__concat_0__" +output_layer_names: "__concat_1__" +sub_models { + name: "root" + layer_names: "a" + layer_names: "b" + layer_names: "__addto_0__" + layer_names: "__concat_0__" + layer_names: "__concat_1__" + input_layer_names: "a" + input_layer_names: "b" + output_layer_names: "__addto_0__" + output_layer_names: "__concat_0__" + output_layer_names: "__concat_1__" + is_recurrent_layer_group: false +} +