diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py index 0fa74948533b4362a7a9206e7a787cf217ca5ca2..3a266e74ea93068cad2757d0076a4ae664ad4cf8 100644 --- a/demo/sentiment/train_v2.py +++ b/demo/sentiment/train_v2.py @@ -1,126 +1,40 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys -from os.path import join as join_path import paddle.trainer_config_helpers.attrs as attrs from paddle.trainer_config_helpers.poolings import MaxPooling -import paddle.v2.layer as layer -import paddle.v2.activation as activation -import paddle.v2.data_type as data_type -import paddle.v2.dataset.imdb as imdb import paddle.v2 as paddle -def sequence_conv_pool(input, - input_size, - context_len, - hidden_size, - name=None, - context_start=None, - pool_type=None, - context_proj_layer_name=None, - context_proj_param_attr=False, - fc_layer_name=None, - fc_param_attr=None, - fc_bias_attr=None, - fc_act=None, - pool_bias_attr=None, - fc_attr=None, - context_attr=None, - pool_attr=None): - """ - Text convolution pooling layers helper. - - Text input => Context Projection => FC Layer => Pooling => Output. - - :param name: name of output layer(pooling layer name) - :type name: basestring - :param input: name of input layer - :type input: LayerOutput - :param context_len: context projection length. See - context_projection's document. - :type context_len: int - :param hidden_size: FC Layer size. - :type hidden_size: int - :param context_start: context projection length. See - context_projection's context_start. - :type context_start: int or None - :param pool_type: pooling layer type. See pooling_layer's document. - :type pool_type: BasePoolingType. - :param context_proj_layer_name: context projection layer name. - None if user don't care. - :type context_proj_layer_name: basestring - :param context_proj_param_attr: context projection parameter attribute. - None if user don't care. - :type context_proj_param_attr: ParameterAttribute or None. - :param fc_layer_name: fc layer name. None if user don't care. - :type fc_layer_name: basestring - :param fc_param_attr: fc layer parameter attribute. None if user don't care. - :type fc_param_attr: ParameterAttribute or None - :param fc_bias_attr: fc bias parameter attribute. False if no bias, - None if user don't care. - :type fc_bias_attr: ParameterAttribute or None - :param fc_act: fc layer activation type. None means tanh - :type fc_act: BaseActivation - :param pool_bias_attr: pooling layer bias attr. None if don't care. - False if no bias. - :type pool_bias_attr: ParameterAttribute or None. - :param fc_attr: fc layer extra attribute. - :type fc_attr: ExtraLayerAttribute - :param context_attr: context projection layer extra attribute. - :type context_attr: ExtraLayerAttribute - :param pool_attr: pooling layer extra attribute. - :type pool_attr: ExtraLayerAttribute - :return: output layer name. - :rtype: LayerOutput - """ - # Set Default Value to param - context_proj_layer_name = "%s_conv_proj" % name \ - if context_proj_layer_name is None else context_proj_layer_name - - with layer.mixed( - name=context_proj_layer_name, - size=input_size * context_len, - act=activation.Linear(), - layer_attr=context_attr) as m: - m += layer.context_projection( - input=input, - context_len=context_len, - context_start=context_start, - padding_attr=context_proj_param_attr) - - fc_layer_name = "%s_conv_fc" % name \ - if fc_layer_name is None else fc_layer_name - fl = layer.fc(name=fc_layer_name, - input=m, - size=hidden_size, - act=fc_act, - layer_attr=fc_attr, - param_attr=fc_param_attr, - bias_attr=fc_bias_attr) - - return layer.pooling( - name=name, - input=fl, - pooling_type=pool_type, - bias_attr=pool_bias_attr, - layer_attr=pool_attr) - - def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128, is_predict=False): - data = layer.data("word", data_type.integer_value_sequence(input_dim)) - emb = layer.embedding(input=data, size=emb_dim) - conv_3 = sequence_conv_pool( - input=emb, input_size=emb_dim, context_len=3, hidden_size=hid_dim) - conv_4 = sequence_conv_pool( - input=emb, input_size=emb_dim, context_len=4, hidden_size=hid_dim) - output = layer.fc(input=[conv_3, conv_4], - size=class_dim, - act=activation.Softmax()) - lbl = layer.data("label", data_type.integer_value(2)) - cost = layer.classification_cost(input=output, label=lbl) + data = paddle.layer.data("word", + paddle.data_type.integer_value_sequence(input_dim)) + emb = paddle.layer.embedding(input=data, size=emb_dim) + conv_3 = paddle.networks.sequence_conv_pool( + input=emb, context_len=3, hidden_size=hid_dim) + conv_4 = paddle.networks.sequence_conv_pool( + input=emb, context_len=4, hidden_size=hid_dim) + output = paddle.layer.fc(input=[conv_3, conv_4], + size=class_dim, + act=paddle.activation.Softmax()) + lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) + cost = paddle.layer.classification_cost(input=output, label=lbl) return cost @@ -152,24 +66,28 @@ def stacked_lstm_net(input_dim, lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.) para_attr = [fc_para_attr, lstm_para_attr] bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.) - relu = activation.Relu() - linear = activation.Linear() - - data = layer.data("word", data_type.integer_value_sequence(input_dim)) - emb = layer.embedding(input=data, size=emb_dim) - - fc1 = layer.fc(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr) - lstm1 = layer.lstmemory( + relu = paddle.activation.Relu() + linear = paddle.activation.Linear() + + data = paddle.layer.data("word", + paddle.data_type.integer_value_sequence(input_dim)) + emb = paddle.layer.embedding(input=data, size=emb_dim) + + fc1 = paddle.layer.fc(input=emb, + size=hid_dim, + act=linear, + bias_attr=bias_attr) + lstm1 = paddle.layer.lstmemory( input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) inputs = [fc1, lstm1] for i in range(2, stacked_num + 1): - fc = layer.fc(input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = layer.lstmemory( + fc = paddle.layer.fc(input=inputs, + size=hid_dim, + act=linear, + param_attr=para_attr, + bias_attr=bias_attr) + lstm = paddle.layer.lstmemory( input=fc, reverse=(i % 2) == 0, act=relu, @@ -177,16 +95,16 @@ def stacked_lstm_net(input_dim, layer_attr=layer_attr) inputs = [fc, lstm] - fc_last = layer.pooling(input=inputs[0], pooling_type=MaxPooling()) - lstm_last = layer.pooling(input=inputs[1], pooling_type=MaxPooling()) - output = layer.fc(input=[fc_last, lstm_last], - size=class_dim, - act=activation.Softmax(), - bias_attr=bias_attr, - param_attr=para_attr) + fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling()) + lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling()) + output = paddle.layer.fc(input=[fc_last, lstm_last], + size=class_dim, + act=paddle.activation.Softmax(), + bias_attr=bias_attr, + param_attr=para_attr) - lbl = layer.data("label", data_type.integer_value(2)) - cost = layer.classification_cost(input=output, label=lbl) + lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) + cost = paddle.layer.classification_cost(input=output, label=lbl) return cost @@ -196,7 +114,7 @@ if __name__ == '__main__': # network config print 'load dictionary...' - word_dict = imdb.word_dict() + word_dict = paddle.dataset.imdb.word_dict() dict_dim = len(word_dict) class_dim = 2 @@ -226,7 +144,8 @@ if __name__ == '__main__': if isinstance(event, paddle.event.EndPass): result = trainer.test( reader=paddle.reader.batched( - lambda: imdb.test(word_dict), batch_size=128), + lambda: paddle.dataset.imdb.test(word_dict), + batch_size=128), reader_dict={'word': 0, 'label': 1}) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) @@ -239,7 +158,7 @@ if __name__ == '__main__': trainer.train( reader=paddle.reader.batched( paddle.reader.shuffle( - lambda: imdb.train(word_dict), buf_size=1000), + lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=100), event_handler=event_handler, reader_dict={'word': 0, diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index fa2ccec6c3270541dd6b13fdfd2323d10ceac642..0fc711e99a18a4ed2665315c22b78e96be942b89 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -93,6 +93,8 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True): name = kwargs.get('name', None) super(V2LayerImpl, self).__init__(name, parent_layers) + if kwargs.has_key('size'): + self.size = kwargs['size'] self.__other_kwargs__ = other_kwargs if wrapper is not None: