diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691fba05be543beadf22acde7d28c5e8e..95f0915972cbacdf4ba860f893d1c47f175b8e8a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -112,6 +112,7 @@ __all__ = [ 'priorbox_layer', 'spp_layer', 'pad_layer', + 'layer_support', ] @@ -708,6 +709,7 @@ class MixedLayerType(LayerOutput): # update the size which might be computed inside MixedLayer # according to the operator's output size self.size = ml.config.size + self.finalized = True @wrap_name_default("mixed") diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 3920d4a08fc6271816d35bbb234e47ab3b93d3c9..d5fe06542bc19effa130171e78c158f99ffa45a7 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -71,7 +71,11 @@ import collections import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ + from paddle.trainer_config_helpers.default_decorators import wrap_name_default +from paddle.trainer_config_helpers.default_decorators import wrap_act_default +from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default +from paddle.trainer_config_helpers.layers import layer_support import data_type import activation @@ -84,6 +88,13 @@ __all__ = [ 'sum_cost', 'huber_cost' ] +__projection_names__ = filter(lambda x: x.endswith('_projection'), + dir(conf_helps)) +__all__ += __projection_names__ + +__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps)) +__all__ += __operator_names__ + def parse_network(*outputs): """ @@ -101,9 +112,8 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) - assert isinstance(name, basestring) self.name = name self.__parent_layers__ = parent_layers @@ -122,22 +132,25 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name not in context: + if self.name is None: + return self.to_proto_impl(**kwargs) + elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() -def __convert_to_v2__(method_name, name_prefix, parent_names): +def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): if name_prefix is not None: wrapper = wrap_name_default(name_prefix=name_prefix) else: wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -148,6 +161,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): if key not in parent_names: other_kwargs[key] = kwargs[key] + name = kwargs.get('name', None) super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -160,7 +174,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] - return getattr(conf_helps, method_name)(name=self.name, **args) + return getattr(conf_helps, method_name)(**args) return V2LayerImpl @@ -191,6 +205,78 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class MixedLayerV2(Layer): + """ + This class is use to support `with` grammar. If not, the following code + could convert mixed_layer simply. + + mixed = __convert_to_v2__( + 'mixed_layer', name_prefix='mixed', parent_names=['input']) + """ + + class AddToSealedMixedLayerExceptionV2(Exception): + pass + + def __init__(self, + size=0, + input=None, + name=None, + act=None, + bias_attr=None, + layer_attr=None): + self.__method_name__ = 'mixed_layer' + self.finalized = False + self.__inputs__ = [] + if input is not None: + self.__inputs__ = input + + other_kwargs = dict() + other_kwargs['name'] = name + other_kwargs['size'] = size + other_kwargs['act'] = act + other_kwargs['bias_attr'] = bias_attr + other_kwargs['layer_attr'] = layer_attr + + parent_layers = {"input": self.__inputs__} + super(MixedLayerV2, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + def __iadd__(self, other): + if not self.finalized: + self.__inputs__.append(other) + return self + else: + raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + + def __enter__(self): + assert len(self.__inputs__) == 0 + return self + + def __exit__(self, *args, **kwargs): + self.finalized = True + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, self.__method_name__)(**args) + + +@wrap_name_default("mixed") +@wrap_act_default(act=activation.Linear()) +@wrap_bias_attr_default(has_bias=False) +@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT) +def mixed(size=0, + name=None, + input=None, + act=None, + bias_attr=False, + layer_attr=None): + return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -226,32 +312,15 @@ sum_cost = __convert_to_v2__( huber_cost = __convert_to_v2__( 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) - - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - maxid = max_id(input=inference) - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - - print parse_network(cost1, cost2) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) +# convert projection +for prj in __projection_names__: + globals()[prj] = __convert_to_v2__(prj, parent_names=['input']) + +# convert operator +operator_list = [ + # [V1_method_name, parent_names], + ['dotmul_operator', ['a', 'b']], + ['conv_operator', ['img', 'filter']] +] +for op in operator_list: + globals()[op[0]] = __convert_to_v2__(op[0], parent_names=op[1]) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index b600e8cf765122ab6cfe8530465391c92be0590f..bf1c344202f457f246c4f5b4f2ea8b42e916499c 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -19,8 +19,6 @@ import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) @@ -58,6 +56,92 @@ class CostLayerTest(unittest.TestCase): #print layer.parse_network(cost5, cost6) #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + def test_projection(self): + input = layer.data(name='data', type=data_type.dense_vector(784)) + word = layer.data( + name='word', type=data_type.integer_value_sequence(10000)) + fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) + mixed0 = layer.mixed( + size=256, + input=[ + layer.full_matrix_projection(input=fc0), + layer.full_matrix_projection(input=fc1) + ]) + with layer.mixed(size=200) as mixed1: + mixed1 += layer.full_matrix_projection(input=fc0) + mixed1 += layer.identity_projection(input=fc1) + + table = layer.table_projection(input=word) + emb0 = layer.mixed(size=512, input=table) + with layer.mixed(size=512) as emb1: + emb1 += table + + scale = layer.scaling_projection(input=fc0) + scale0 = layer.mixed(size=100, input=scale) + with layer.mixed(size=100) as scale1: + scale1 += scale + + dotmul = layer.dotmul_projection(input=fc0) + dotmul0 = layer.mixed(size=100, input=dotmul) + with layer.mixed(size=100) as dotmul1: + dotmul1 += dotmul + + context = layer.context_projection(input=fc0, context_len=5) + context0 = layer.mixed(size=100, input=context) + with layer.mixed(size=100) as context1: + context1 += context + + conv = layer.conv_projection( + input=input, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv, bias_attr=True) + with layer.mixed(bias_attr=True) as conv1: + conv1 += conv + + print layer.parse_network(mixed0) + print layer.parse_network(mixed1) + print layer.parse_network(emb0) + print layer.parse_network(emb1) + print layer.parse_network(scale0) + print layer.parse_network(scale1) + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + def test_operator(self): + ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) + ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) + fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + + dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) + dotmul0 = layer.mixed(input=dotmul_op) + with layer.mixed() as dotmul1: + dotmul1 += dotmul_op + + conv = layer.conv_operator( + img=ipt0, + filter=ipt1, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv) + with layer.mixed() as conv1: + conv1 += conv + + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + if __name__ == '__main__': unittest.main()