diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index b31efe170dbf11771311ed5dbf4cd8b299b0c4ca..4dbcd3bb6b8b4c813a391d52ae751409bded914e 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import event import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr @@ -29,7 +30,7 @@ import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', - 'topology' + 'topology', 'networks' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bf5d653e8ae3e442f7b613b83ed17b60c0c5c8b7..82ccd8498a3dbba3644b8b802ddaa294ae183496 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -109,9 +109,10 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.size = size self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -173,7 +174,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - super(V2LayerImpl, self).__init__(name, parent_layers) + size = kwargs.get('size', None) + super(V2LayerImpl, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -220,9 +222,10 @@ class WithExtraParent(Layer): def extra_parent(self): return self.__extra_parent__ - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): self.__extra_parent__ = [] - super(WithExtraParent, self).__init__(name, parent_layers) + super(WithExtraParent, self).__init__( + name=name, size=size, parent_layers=parent_layers) def append_extra_parent(self, parent): self.__extra_parent__.append(parent) @@ -261,7 +264,8 @@ class MemoryV2(WithExtraParent): def __init__(self, name, size, **kwargs): self.name = name self.size = size - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + super(MemoryV2, self).__init__( + name=name, size=size, parent_layers=dict()) self.__kwargs__ = kwargs self.__boot_layer_name__ = None if 'boot_layer' in kwargs: @@ -271,7 +275,9 @@ class MemoryV2(WithExtraParent): st = inspect.stack() for i in xrange(len(st)): locs = inspect.stack()[i][0].f_locals - for val in locs.viewvalues(): + keys = locs.keys() + for key in keys: + val = locs[key] if isinstance(val, RecurrentLayerInput): begin_of_current_rnn.append(val) @@ -322,21 +328,15 @@ class LayerOutputV2(Layer): return self.layer_output -class StaticInputV2(Layer): - def __init__(self, input=None, **kwargs): - assert input is not None - self.__kwargs__ = kwargs - super(StaticInputV2, self).__init__( - name=input.name, parent_layers={'input': input}) - - def context_name(self): - return self.name + "#static_input" - - def to_proto_impl(self, **kwargs): - args = dict() - args.update(kwargs) - args.update(self.__kwargs__) - return conf_helps.StaticInput(**args) +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None class MixedLayerV2(Layer): @@ -370,9 +370,8 @@ class MixedLayerV2(Layer): other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} - super(MixedLayerV2, self).__init__(name, parent_layers) + super(MixedLayerV2, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): @@ -452,6 +451,12 @@ def recurrent_group(step, input, name=None): if not isinstance(input, collections.Sequence): input = [input] + # TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group + for i in xrange(len(input)): + cur_input = input[i] + if isinstance(cur_input, StaticInputV2): + input[i] = cur_input.input + actual_input = [ RecurrentLayerInput( recurrent_name=name, @@ -512,7 +517,7 @@ def __layer_name_mapping_parent_names__(inname): lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', 'expand_as', 'weights', 'vectors', 'weight', 'score', 'left', - 'right'], + 'right', 'output_mem'], all_args) diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..2877b56b18dc9f1247fe8d3edd7a3ed32988fc44 --- /dev/null +++ b/python/paddle/v2/networks.py @@ -0,0 +1,19 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from layer import __convert_to_v2__ + +simple_gru = __convert_to_v2__('simple_gru', ['input']) +simple_attention = __convert_to_v2__( + 'simple_attention', ['encoded_sequence', 'encoded_proj', 'decoder_state']) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py index 48aeb42391576f075be008ab70fc8b23786d3a12..5fbbd20eb76bb9daab2bcf98c4adad989106a377 100644 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -74,21 +74,28 @@ class RNNTest(unittest.TestCase): label_dim = 3 def parse_old_rnn(): - def step(y, wid): - z = conf_helps.embedding_layer(input=wid, size=word_dim) - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, z, mem], - size=hidden_dim, - act=conf_helps.TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - def test(): data = conf_helps.data_layer(name="word", size=dict_dim) label = conf_helps.data_layer(name="label", size=label_dim) emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + out = conf_helps.recurrent_group( name="rnn", step=step, input=[emb, data]) @@ -111,11 +118,9 @@ class RNNTest(unittest.TestCase): label = layer.data( name="label", type=data_type.dense_vector(label_dim)) emb = layer.embedding(input=data, size=word_dim) - boot_layer = layer.data( name="boot", type=data_type.dense_vector(10)) - - boot_layer = layer.fc(name='wtf', input=boot_layer, size=10) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) def step(y, wid): z = layer.embedding(input=wid, size=word_dim) @@ -141,11 +146,9 @@ class RNNTest(unittest.TestCase): return str(layer.parse_network(cost)) - with open("/Users/baidu/old.out", 'w') as f: - print >> f, parse_old_rnn() - with open("/Users/baidu/new.out", "w") as f: - print >> f, parse_new_rnn() - # print ''.join(diff) + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) if __name__ == '__main__':