diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index de903f8c747c97a8e3830d090baa05bf27b23e24..1bb1a01d509e6412c254fce856101137e66b1e12 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -30,28 +30,90 @@ except ImportError: import copy __all__ = [ - "full_matrix_projection", "AggregateLevel", "ExpandLevel", - "identity_projection", "dotmul_projection", "dotmul_operator", - "repeat_layer", "seq_reshape_layer", "table_projection", "mixed_layer", - "data_layer", "embedding_layer", "fc_layer", "grumemory", "pooling_layer", - "lstmemory", "last_seq", "first_seq", "cos_sim", "hsigmoid", - "conv_projection", "regression_cost", 'classification_cost', "LayerOutput", - 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', 'img_cmrnorm_layer', - 'addto_layer', 'concat_layer', 'seq_concat_layer', 'lstm_step_layer', - 'recurrent_group', 'memory', 'StaticInput', 'expand_layer', 'scaling_layer', - 'scaling_projection', 'power_layer', 'interpolation_layer', - 'bilinear_interp_layer', 'trans_layer', 'rotate_layer', - 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', - 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', - 'SubsequenceInput', 'gru_step_layer', 'recurrent_layer', - 'BaseGeneratedInput', 'conv_operator', 'conv_shift_layer', 'tensor_layer', - 'selective_fc_layer', 'sampling_id_layer', 'slope_intercept_layer', - 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', - 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', - 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', - 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', - 'print_layer', 'priorbox_layer', 'spp_layer', 'pad_layer', 'eos_layer' + "full_matrix_projection", + "AggregateLevel", + "ExpandLevel", + "identity_projection", + "dotmul_projection", + "dotmul_operator", + "repeat_layer", + "seq_reshape_layer", + "table_projection", + "mixed_layer", + "data_layer", + "embedding_layer", + "fc_layer", + "grumemory", + "pooling_layer", + "lstmemory", + "last_seq", + "first_seq", + "cos_sim", + "hsigmoid", + "conv_projection", + "regression_cost", + 'classification_cost', + "LayerOutput", + 'img_conv_layer', + 'img_pool_layer', + 'batch_norm_layer', + 'img_cmrnorm_layer', + 'addto_layer', + 'concat_layer', + 'seq_concat_layer', + 'lstm_step_layer', + 'recurrent_group', + 'memory', + 'StaticInput', + 'expand_layer', + 'scaling_layer', + 'scaling_projection', + 'power_layer', + 'interpolation_layer', + 'bilinear_interp_layer', + 'trans_layer', + 'rotate_layer', + 'sum_to_one_norm_layer', + 'get_output_layer', + 'LayerType', + 'context_projection', + 'beam_search', + 'maxid_layer', + 'GeneratedInput', + 'SubsequenceInput', + 'gru_step_layer', + 'recurrent_layer', + 'BaseGeneratedInput', + 'conv_operator', + 'conv_shift_layer', + 'tensor_layer', + 'selective_fc_layer', + 'sampling_id_layer', + 'slope_intercept_layer', + 'trans_full_matrix_projection', + 'linear_comb_layer', + 'convex_comb_layer', + 'ctc_layer', + 'warp_ctc_layer', + 'crf_layer', + 'crf_decoding_layer', + 'nce_layer', + 'cross_entropy_with_selfnorm', + 'cross_entropy', + 'multi_binary_label_cross_entropy', + 'sum_cost', + 'rank_cost', + 'lambda_cost', + 'huber_cost', + 'block_expand_layer', + 'maxout_layer', + 'out_prod_layer', + 'print_layer', + 'priorbox_layer', + 'spp_layer', + 'pad_layer', + 'eos_layer', + 'layer_support', ] @@ -648,6 +710,7 @@ class MixedLayerType(LayerOutput): # update the size which might be computed inside MixedLayer # according to the operator's output size self.size = ml.config.size + self.finalized = True @wrap_name_default("mixed") diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f0e4f972feb2600ed097d7d0ce3c218acf26f0d6..d15e6398f51f43c1eeab67bba654f91cc56135a4 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -71,9 +71,15 @@ import collections import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ + from paddle.trainer_config_helpers.default_decorators import wrap_name_default +from paddle.trainer_config_helpers.default_decorators import wrap_act_default +from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default +from paddle.trainer_config_helpers.layers import layer_support import data_type +import activation +import attr __all__ = [ 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp', @@ -89,6 +95,13 @@ __all__ = [ 'hsigmoid', 'eos' ] +__projection_names__ = filter(lambda x: x.endswith('_projection'), + dir(conf_helps)) +__all__ += __projection_names__ + +__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps)) +__all__ += __operator_names__ + def parse_network(*outputs): """ @@ -106,9 +119,8 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) - assert isinstance(name, basestring) self.name = name self.__parent_layers__ = parent_layers @@ -127,19 +139,25 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name not in context: + if self.name is None: + return self.to_proto_impl(**kwargs) + elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() -def __convert_to_v2__(method_name, parent_names): - wrapper = wrap_name_default(name_prefix=method_name) +def __convert_to_v2__(method_name, parent_names, is_default_name=True): + if is_default_name: + wrapper = wrap_name_default(name_prefix=method_name) + else: + wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -150,6 +168,7 @@ def __convert_to_v2__(method_name, parent_names): if key not in parent_names: other_kwargs[key] = kwargs[key] + name = kwargs.get('name', None) super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -162,7 +181,7 @@ def __convert_to_v2__(method_name, parent_names): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] - return getattr(conf_helps, method_name)(name=self.name, **args) + return getattr(conf_helps, method_name)(**args) return V2LayerImpl @@ -193,6 +212,78 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class MixedLayerV2(Layer): + """ + This class is use to support `with` grammar. If not, the following code + could convert mixed_layer simply. + + mixed = __convert_to_v2__( + 'mixed_layer', name_prefix='mixed', parent_names=['input']) + """ + + class AddToSealedMixedLayerExceptionV2(Exception): + pass + + def __init__(self, + size=0, + input=None, + name=None, + act=None, + bias_attr=None, + layer_attr=None): + self.__method_name__ = 'mixed_layer' + self.finalized = False + self.__inputs__ = [] + if input is not None: + self.__inputs__ = input + + other_kwargs = dict() + other_kwargs['name'] = name + other_kwargs['size'] = size + other_kwargs['act'] = act + other_kwargs['bias_attr'] = bias_attr + other_kwargs['layer_attr'] = layer_attr + + parent_layers = {"input": self.__inputs__} + super(MixedLayerV2, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + def __iadd__(self, other): + if not self.finalized: + self.__inputs__.append(other) + return self + else: + raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + + def __enter__(self): + assert len(self.__inputs__) == 0 + return self + + def __exit__(self, *args, **kwargs): + self.finalized = True + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, self.__method_name__)(**args) + + +@wrap_name_default("mixed") +@wrap_act_default(act=activation.Linear()) +@wrap_bias_attr_default(has_bias=False) +@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT) +def mixed(size=0, + name=None, + input=None, + act=None, + bias_attr=False, + layer_attr=None): + return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) + + data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel @@ -274,3 +365,18 @@ layer_list = [ ] for l in layer_list: globals()[l[0]] = __convert_to_v2__(l[1], l[2]) + +# convert projection +for prj in __projection_names__: + globals()[prj] = __convert_to_v2__( + prj, parent_names=['input'], is_default_name=False) + +# convert operator +operator_list = [ + # [V1_method_name, parent_names], + ['dotmul_operator', ['a', 'b']], + ['conv_operator', ['img', 'filter']] +] +for op in operator_list: + globals()[op[0]] = __convert_to_v2__( + op[0], parent_names=op[1], is_default_name=False) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 2f139866e8096af4a2833acee95da858bf3f315f..bb0099ea2fbb78b0a05eedf23af95a02e8849015 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -167,5 +167,93 @@ class OtherLayerTest(unittest.TestCase): print layer.parse_network(pad) +class ProjOpTest(unittest.TestCase): + def test_projection(self): + input = layer.data(name='data', type=data_type.dense_vector(784)) + word = layer.data( + name='word', type=data_type.integer_value_sequence(10000)) + fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) + mixed0 = layer.mixed( + size=256, + input=[ + layer.full_matrix_projection(input=fc0), + layer.full_matrix_projection(input=fc1) + ]) + with layer.mixed(size=200) as mixed1: + mixed1 += layer.full_matrix_projection(input=fc0) + mixed1 += layer.identity_projection(input=fc1) + + table = layer.table_projection(input=word) + emb0 = layer.mixed(size=512, input=table) + with layer.mixed(size=512) as emb1: + emb1 += table + + scale = layer.scaling_projection(input=fc0) + scale0 = layer.mixed(size=100, input=scale) + with layer.mixed(size=100) as scale1: + scale1 += scale + + dotmul = layer.dotmul_projection(input=fc0) + dotmul0 = layer.mixed(size=100, input=dotmul) + with layer.mixed(size=100) as dotmul1: + dotmul1 += dotmul + + context = layer.context_projection(input=fc0, context_len=5) + context0 = layer.mixed(size=100, input=context) + with layer.mixed(size=100) as context1: + context1 += context + + conv = layer.conv_projection( + input=input, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv, bias_attr=True) + with layer.mixed(bias_attr=True) as conv1: + conv1 += conv + + print layer.parse_network(mixed0) + print layer.parse_network(mixed1) + print layer.parse_network(emb0) + print layer.parse_network(emb1) + print layer.parse_network(scale0) + print layer.parse_network(scale1) + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + def test_operator(self): + ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) + ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) + fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + + dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) + dotmul0 = layer.mixed(input=dotmul_op) + with layer.mixed() as dotmul1: + dotmul1 += dotmul_op + + conv = layer.conv_operator( + img=ipt0, + filter=ipt1, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv) + with layer.mixed() as conv1: + conv1 += conv + + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + if __name__ == '__main__': unittest.main()