diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index d548d1adaafacdb097dbe476fdc76651c9f46b6b..8ab8cd2f85d5d7bcf86c2f57b350dfcd99177b69 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import event import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 035f96b0f2e978a413a1ebe0ec115f75ff07befc..fa2ccec6c3270541dd6b13fdfd2323d10ceac642 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -22,6 +22,7 @@ class Layer(object): def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.__contex__ = {} self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -39,16 +40,38 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) - - return context[self.name] + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) + self.__contex__ = context + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + + def use_context_name(self): + return False + + def calculate_size(self): + """ + lazy calculate size of the layer, should be called when to_proto_impl of + this layer is called. + :return: + """ + return self.__contex__[self.context_name()].size + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 67111f1315fbb0f55c1db0f6fe89fc988c8d83f6..2f55611aaa1d3ae22f5d7f184b38e622271881ea 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -65,19 +65,24 @@ to be in a Python function but could be anywhere. Also, the creation of a protobuf message is hidden in the invocation of paddle.v2.parameters.create, no longer exposed to users. """ + +import collections +import inspect from config_base import Layer, __convert_to_v2__ import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ - -from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import \ wrap_bias_attr_default +from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.layers import layer_support +from paddle.trainer.config_parser import \ + RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \ + RecurrentLayerGroupEnd, model_type -import data_type import activation +import data_type __all__ = ['parse_network', 'data'] @@ -130,6 +135,137 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class WithExtraParent(Layer): + def extra_parent(self): + return self.__extra_parent__ + + def __init__(self, name=None, parent_layers=None): + self.__extra_parent__ = [] + super(WithExtraParent, self).__init__( + name=name, parent_layers=parent_layers) + + def append_extra_parent(self, parent): + self.__extra_parent__.append(parent) + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for p in self.__extra_parent__: + p.to_proto(context=context) + + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.context_name() is None: + return self.to_proto_impl(context=context, **kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl( + context=context, **kwargs) + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] + + +class MemoryV2(WithExtraParent): + def __init__(self, name, **kwargs): + self.name = name + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + self.__kwargs__ = kwargs + self.__boot_layer_name__ = None + if 'boot_layer' in kwargs: + begin_of_current_rnn = [] + # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a + # function inside step. + st = inspect.stack() + for i in xrange(len(st)): + locs = inspect.stack()[i][0].f_locals + keys = locs.keys() + for key in keys: + val = locs[key] + if isinstance(val, RecurrentLayerInput): + begin_of_current_rnn.append(val) + elif isinstance(val, collections.Sequence): + for v in val: + if isinstance(v, RecurrentLayerInput): + begin_of_current_rnn.append(v) + + if begin_of_current_rnn: + break + assert begin_of_current_rnn is not None + for extra in begin_of_current_rnn: + self.append_extra_parent(extra) + assert isinstance(extra, WithExtraParent) + extra.append_extra_parent(kwargs['boot_layer']) + self.__boot_layer_name__ = kwargs['boot_layer'].name + + def to_proto_impl(self, context, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + + if self.__boot_layer_name__ is not None: + args['boot_layer'] = context[self.__boot_layer_name__] + + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size + return conf_helps.memory(name=self.name, **args) + + def context_name(self): + return self.name + "#memory" + + def use_context_name(self): + """ + memory layer will have the same name with some layer + :return: + """ + return True + + +class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + + def __init__(self, layer_output): + assert isinstance(layer_output, conf_helps.LayerOutput) + self.layer_output = layer_output + super(LayerOutputV2, self).__init__( + name=layer_output.name, parent_layers=dict()) + + def to_proto_impl(self): + return self.layer_output + + +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None + + class MixedLayerV2(Layer): """ This class is use to support `with` grammar. If not, the following code @@ -161,7 +297,6 @@ class MixedLayerV2(Layer): other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -171,7 +306,7 @@ class MixedLayerV2(Layer): self.__inputs__.append(other) return self else: - raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + raise MixedLayerV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): assert len(self.__inputs__) == 0 @@ -186,6 +321,13 @@ class MixedLayerV2(Layer): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -202,14 +344,51 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +class RecurrentLayerInput(WithExtraParent): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerInput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".begin" + + def to_proto_impl(self, context, **kwargs): + model_type('recurrent_nn') + RecurrentLayerGroupWithoutOutLinksBegin( + name=self.__recurrent_name__, + in_links=map(lambda x: x.name, self.__parents__)) + return self + + +class RecurrentLayerOutput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerOutput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".end" + + def to_proto_impl(self, **kwargs): + for l in self.__parents__: + RecurrentLayerGroupSetOutLink(l.name) + RecurrentLayerGroupEnd(name=self.__recurrent_name__) + + LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel +memory = MemoryV2 def __layer_name_mapping__(inname): - if inname in ['data_layer', 'memory', 'mixed_layer']: + if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']: # Do Not handle these layers return elif inname == 'maxid_layer': @@ -231,8 +410,10 @@ def __layer_name_mapping__(inname): def __layer_name_mapping_parent_names__(inname): all_args = getattr(conf_helps, inname).argspec.args return filter( - lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', - 'weights', 'vectors', 'weight', 'score', 'left', 'right'], + lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', + 'expand_as', + 'weights', 'vectors', 'weight', 'score', 'left', + 'right', 'output_mem'], all_args) @@ -267,3 +448,54 @@ operator_list = [ for op in operator_list: globals()[op[0]] = __convert_to_v2__( op[0], parent_names=op[1], is_default_name=False) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), + input) + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': non_static_inputs}) + for i in xrange(len(non_static_inputs)) + ] + + def __real_step__(*args): + rnn_input = list(args) + static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) + for static_input in static_inputs: + mem_name = "__%s_memory__" % static_input.input.name + mem = memory( + name=mem_name, + is_seq=static_input.is_seq, + size=static_input.input.calculate_size, + boot_layer=static_input.input) + with mixed( + name=mem_name, + size=static_input.input.calculate_size, + act=activation.Identity()) as mix: + mix += identity_projection(input=mem) + rnn_input.insert(input.index(static_input), mix) + return step(*rnn_input) + + actual_output = __real_step__(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 46b5d08b8761ea58530f5aefe5d1947408727f85..572deaff356712cac23cd7911cdf289db100564c 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,12 +1,16 @@ +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) + add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) -add_test(NAME test_v2_api - COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) -add_test(NAME topology_test +add_test(NAME test_topology COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbbd20eb76bb9daab2bcf98c4adad989106a377 --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,155 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + boot_layer = layer.data( + name="boot", type=data_type.dense_vector(10)) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory( + name="rnn_state", size=hidden_dim, boot_layer=boot_layer) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main()