From 04b5daf92d69a9cce32af59fa5cf62610ef44f24 Mon Sep 17 00:00:00 2001 From: wangyang59 Date: Wed, 1 Feb 2017 13:44:00 -0800 Subject: [PATCH] change the parameter position of gru_step_layer from 1 back to 0 --- paddle/gserver/layers/GruStepLayer.cpp | 4 ++-- paddle/gserver/tests/test_LayerGrad.cpp | 4 ++-- python/paddle/trainer/config_parser.py | 2 +- python/paddle/trainer_config_helpers/layers.py | 5 ++++- .../tests/configs/protostr/shared_gru.protostr | 4 ++-- .../tests/configs/protostr/test_rnn_group.protostr | 4 ++-- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/layers/GruStepLayer.cpp b/paddle/gserver/layers/GruStepLayer.cpp index ce692c49088..4a1006aa941 100644 --- a/paddle/gserver/layers/GruStepLayer.cpp +++ b/paddle/gserver/layers/GruStepLayer.cpp @@ -68,8 +68,8 @@ bool GruStepLayer::init(const LayerMap& layerMap, if (!Layer::init(layerMap, parameterMap)) return false; CHECK_EQ(2U, inputLayers_.size()); - CHECK_EQ(getSize() * getSize() * 3, parameters_[1]->getSize()); - weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[1])); + CHECK_EQ(getSize() * getSize() * 3, parameters_[0]->getSize()); + weight_.reset(new Weight(getSize(), getSize() * 3, parameters_[0])); if (biasParameter_.get() != NULL) { CHECK_EQ(getSize() * 3, biasParameter_->getSize()); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 813a99d8cda..8c8e876bd64 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1404,9 +1404,9 @@ TEST(Layer, GruStepLayer) { config.biasSize = 12; config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 0}); + {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 48}); + {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); config.layerConfig.add_inputs(); config.layerConfig.add_inputs(); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4fbf076ae98..6701eced60d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2996,7 +2996,7 @@ class GruStepLayer(LayerBase): config_assert(input_layer1.size == size, 'input_layer1.size != layer.size') self.config.active_gate_type = active_gate_type - self.create_input_parameter(1, size * size * 3, [size, size * 3]) + self.create_input_parameter(0, size * size * 3, [size, size * 3]) self.create_bias_parameter(bias, size * 3) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f0b5d7c3b4f..770d6303c13 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2706,6 +2706,9 @@ def gru_step_layer(input, :param name: :param gate_act: :param bias_attr: + :param param_attr: the parameter_attribute for transforming the output_mem + from previous step. It is instead grouped with input due + to backward model compatibility. :param layer_attr: :return: LayerOutput object. :rtype: LayerOutput @@ -2716,7 +2719,7 @@ def gru_step_layer(input, Layer( name=name, type=LayerType.GRU_STEP_LAYER, - inputs=[input.name, Input(output_mem.name, **param_attr.attr)], + inputs=[Input(input.name, **param_attr.attr), output_mem.name], bias=ParamAttr.to_bias(bias_attr), size=size, active_type=act.name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr index c0868713ebb..b6905824f0c 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr @@ -51,10 +51,10 @@ layers { active_type: "tanh" inputs { input_layer_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" + input_parameter_name: "gru_param" } inputs { input_layer_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" - input_parameter_name: "gru_param" } bias_parameter_name: "gru_bias" active_gate_type: "sigmoid" @@ -105,10 +105,10 @@ layers { active_type: "tanh" inputs { input_layer_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" + input_parameter_name: "gru_param" } inputs { input_layer_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" - input_parameter_name: "gru_param" } bias_parameter_name: "gru_bias" active_gate_type: "sigmoid" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index c1d39f77295..3e9d28416ed 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -307,10 +307,10 @@ layers { active_type: "tanh" inputs { input_layer_name: "__mixed_1__@__gru_group_0___recurrent_group" + input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" } inputs { input_layer_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" - input_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.w1" } bias_parameter_name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias" active_gate_type: "sigmoid" @@ -462,7 +462,7 @@ parameters { initial_smart: false } parameters { - name: "___gru_group_0__@__gru_group_0___recurrent_group.w1" + name: "___gru_group_0__@__gru_group_0___recurrent_group.w0" size: 30000 initial_mean: 0.0 initial_std: 0.1 -- GitLab