diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 5e66b7399ad52f74c243cbc99706d6dd87d9e557..6fc01ce58be57c77144c6558d039430b22d3a746 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -3,9 +3,6 @@ import paddle.v2 as paddle import mnist_util -import pudb -pudb.set_trace() - def train_reader(): train_file = './data/raw_data/train' diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da937152ee0ce788309690c7b718943bb21b5a76..487d4dfd5b1b8132a8991a3ee53d97c6901b9038 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3474,6 +3474,8 @@ def update_g_config(): for name in g_config.model_config.output_layer_names: assert name in g_layer_map, \ 'input name "%s" does not correspond to a layer name' % name + for hook in _parse_config_hooks: + hook() return g_config @@ -3485,8 +3487,8 @@ def parse_config(trainer_config, config_arg_str): passed to config script as a dictionary CONFIG_ARGS ''' init_config_environment() - for hook in _parse_config_hooks: - hook() + # for hook in _parse_config_hooks: + # hook() config_args = {} diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 44c7661b2463f0b2961e29983daa85ba1afd9b3a..5328070456ef1ea6cefc1fa252e4f6a825448148 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -124,11 +124,13 @@ class Layer(object): return self.to_proto_impl(**kwargs) # memory may have the same name with some layer - if isinstance(self, MemoryV2) or isinstance(self, LayerOutputV2): + if isinstance(self, MemoryV2): return self.to_proto_impl(**kwargs) + # store v1 API's layer_output in context with the key of it's name. if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): @@ -200,8 +202,19 @@ class MemoryV2(Layer): def __init__(self, name, size, **kwargs): self.name = name self.size = size - self.__kwargs__ = kwargs - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + + parent_names = ['boot_layer'] + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + super(MemoryV2, self).__init__(name=name, parent_layers=parent_layers) + self.__kwargs__ = other_kwargs def to_proto_impl(self, **kwargs): args = dict() @@ -209,10 +222,16 @@ class MemoryV2(Layer): args[each] = kwargs[each] for each in self.__kwargs__: args[each] = self.__kwargs__[each] + return conf_helps.memory(name=self.name, size=self.size, **args) class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + def __init__(self, layer_output): assert isinstance(layer_output, conf_helps.LayerOutput) self.layer_output = layer_output @@ -239,8 +258,11 @@ class RecurrentGroupV2(Layer): super(RecurrentGroupV2, self).__init__( name=name, parent_layers=parent_layers) + wrapper = wrap_name_default(name_prefix='recurrent_group') + __init__ = wrapper(__init__) + def to_proto_impl(self, **kwargs): - def in_args_converter(in_args): + def in_args_converter(*in_args): if not isinstance(in_args, collections.Sequence): in_args = [in_args] return [LayerOutputV2(input) for input in in_args] diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index ceb71c1454b2bad60e2bdd6da9280a66d33c5fad..bc0f24792796b82d0c4c59addb2ee5457b452ae2 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,3 +1,7 @@ add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py) + +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 04c0fc7cb0bf12b4ecf732f540f01ecfa38825d0..41d9683464d29f095a8550f9effe348e4f26563a 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -11,16 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import difflib import unittest -import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) @@ -57,51 +53,5 @@ class CostLayerTest(unittest.TestCase): print layer.parse_network(cost7, cost8, cost9, cost10, cost11) -class RNNTest(unittest.TestCase): - def test_simple_rnn(self): - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - - def parse_old_rnn(): - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data1 = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data1, size=word_dim) - conf_helps.recurrent_group(name="rnn", step=step, input=embd) - - return str(parse_network(test)) - - def parse_new_rnn(): - def new_step(y): - mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - data1 = layer.data( - name="word", type=data_type.integer_value(dict_dim)) - embd = layer.embedding(input=data1, size=word_dim) - rnn_layer = layer.recurrent_group( - name="rnn", step=new_step, input=embd) - return str(layer.parse_network(rnn_layer)) - - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..bf2c4db61aa502dd8383a60a92e26ffee759105d --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,143 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main()