diff --git a/.travis.yml b/.travis.yml index 28d1f51be7107b57594eb8aacab2e82d2dec624a..5a7f45a748ac7e81f3f90c245bcf2cd84c4e9027 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,22 +4,14 @@ cache: - $HOME/third_party - $HOME/.ccache - $HOME/.cache/pip - - $HOME/Library/Caches/Homebrew sudo: required dist: trusty os: - linux - - osx env: - JOB=DOCS - JOB=BUILD_AND_TEST - JOB=PRE_COMMIT -matrix: - exclude: - - os: osx - env: JOB=DOCS # Only generate documentation in linux. - - os: osx - env: JOB=PRE_COMMIT # Only check pre-commit hook in linux addons: apt: @@ -53,7 +45,6 @@ before_install: fi fi fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 84f459033f06f89d3b150317793c7e62274468b2..26da7e8e384bafdcbcd1a358c39cc6eb167b067e 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -14,46 +14,50 @@ INCLUDE(ExternalProject) -SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) -SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) -SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) +FIND_PACKAGE(Protobuf) -INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) +IF(NOT PROTOBUF_FOUND) + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) + SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) + + IF(WIN32) + SET(PROTOBUF_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) + SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) + ELSE(WIN32) + SET(PROTOBUF_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) + SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) + ENDIF(WIN32) -IF(WIN32) - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) -ELSE(WIN32) - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) -ENDIF(WIN32) + ExternalProject_Add( + protobuf + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PROTOBUF_SOURCES_DIR} + UPDATE_COMMAND "" + DEPENDS zlib + GIT_REPOSITORY "https://github.com/google/protobuf.git" + GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" + CONFIGURE_COMMAND + ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake + -Dprotobuf_BUILD_TESTS=OFF + -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=lib + ) -ExternalProject_Add( - protobuf - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${PROTOBUF_SOURCES_DIR} - UPDATE_COMMAND "" - DEPENDS zlib - GIT_REPOSITORY "https://github.com/google/protobuf.git" - GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" - CONFIGURE_COMMAND - ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake - -Dprotobuf_BUILD_TESTS=OFF - -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=lib -) + LIST(APPEND external_project_dependencies protobuf) +ENDIF(NOT PROTOBUF_FOUND) -LIST(APPEND external_project_dependencies protobuf) +INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 6372a9a768e580f74f837ccb6c57d4f4395eb779..0accf1a8dd83560324716f0f4936be56dd7a9f1b 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) - -MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}") -MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}") -MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}") diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 341a7165da4e2f6bce1d37bfecb9b58abedc44a1..9b7ebde5007047e34da9274bf8165cfa527e2cf1 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -55,7 +55,7 @@ def convolutional_neural_network(img): def main(): - paddle.init(use_gpu=True, trainer_count=1) + paddle.init(use_gpu=False, trainer_count=1) # define network topology images = paddle.layer.data( @@ -82,7 +82,7 @@ def main(): parameters=parameters, update_equation=optimizer) - list = [] + lists = [] def event_handler(event): if isinstance(event, paddle.event.EndIteration): @@ -93,9 +93,9 @@ def main(): result = trainer.test(reader=paddle.reader.batched( paddle.dataset.mnist.test(), batch_size=128)) print "Test with Pass %d, Cost %f, %s\n" % ( - event.pass_id, event.cost, result.metrics) - list.append((event.pass_id, event.cost, - result.metrics['classification_error_evaluator'])) + event.pass_id, result.cost, result.metrics) + lists.append((event.pass_id, result.cost, + result.metrics['classification_error_evaluator'])) trainer.train( reader=paddle.reader.batched( @@ -106,7 +106,7 @@ def main(): num_passes=100) # find the best pass - best = sorted(list, key=lambda list: float(list[1]))[0] + best = sorted(lists, key=lambda list: float(list[1]))[0] print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..e946a792b5f51ab54355aac0b6e9aef51ae815fb --- /dev/null +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -0,0 +1,175 @@ +import sys +import math +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 + + +def db_lstm(): + word_dict, verb_dict, label_dict = conll05.get_dict() + word_dict_len = len(word_dict) + label_dict_len = len(label_dict) + pred_len = len(verb_dict) + + mark_dict_len = 2 + word_dim = 32 + mark_dim = 5 + hidden_dim = 512 + depth = 8 + + #8 features + def d_type(size): + return paddle.data_type.integer_value_sequence(size) + + word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) + predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) + + ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) + ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) + ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) + ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) + ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) + mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) + + target = paddle.layer.data(name='target', type=d_type(label_dict_len)) + + default_std = 1 / math.sqrt(hidden_dim) / 3.0 + + emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) + std_0 = paddle.attr.Param(initial_std=0.) + std_default = paddle.attr.Param(initial_std=default_std) + + predicate_embedding = paddle.layer.embedding( + size=word_dim, + input=predicate, + param_attr=paddle.attr.Param( + name='vemb', initial_std=default_std)) + mark_embedding = paddle.layer.embedding( + size=mark_dim, input=mark, param_attr=std_0) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + paddle.layer.embedding( + size=word_dim, input=x, param_attr=emb_para) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0 = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=emb, param_attr=std_default) for emb in emb_layers + ]) + + mix_hidden_lr = 1e-3 + lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) + hidden_para_attr = paddle.attr.Param( + initial_std=default_std, learning_rate=mix_hidden_lr) + + lstm_0 = paddle.layer.lstmemory( + input=hidden_0, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + bias_attr=std_0, + param_attr=lstm_para_attr) + + #stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ]) + + lstm = paddle.layer.lstmemory( + input=mix_hidden, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + reverse=((i % 2) == 1), + bias_attr=std_0, + param_attr=lstm_para_attr) + + input_tmp = [mix_hidden, lstm] + + feature_out = paddle.layer.mixed( + size=label_dict_len, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ], ) + + crf_cost = paddle.layer.crf(size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param( + name='crfw', + initial_std=default_std, + learning_rate=mix_hidden_lr)) + + crf_dec = paddle.layer.crf_decoding( + name='crf_dec_l', + size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param(name='crfw')) + + return crf_cost, crf_dec + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + crf_cost, crf_dec = db_lstm() + + # create parameters + parameters = paddle.parameters.create([crf_cost, crf_dec]) + + # create optimizer + optimizer = paddle.optimizer.Momentum( + momentum=0, + learning_rate=2e-2, + regularization=paddle.optimizer.L2Regularization(rate=8e-4), + model_average=paddle.optimizer.ModelAverage( + average_window=0.5, max_average_window=10000), ) + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + trainer = paddle.trainer.SGD(cost=crf_cost, + parameters=parameters, + update_equation=optimizer) + parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) + + trn_reader = paddle.reader.batched( + paddle.reader.shuffle( + conll05.test(), buf_size=8192), batch_size=10) + + trainer.train( + reader=trn_reader, event_handler=event_handler, num_passes=10000) + + +if __name__ == '__main__': + main() diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..0fa74948533b4362a7a9206e7a787cf217ca5ca2 --- /dev/null +++ b/demo/sentiment/train_v2.py @@ -0,0 +1,247 @@ +import sys +from os.path import join as join_path +import paddle.trainer_config_helpers.attrs as attrs +from paddle.trainer_config_helpers.poolings import MaxPooling +import paddle.v2.layer as layer +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.dataset.imdb as imdb +import paddle.v2 as paddle + + +def sequence_conv_pool(input, + input_size, + context_len, + hidden_size, + name=None, + context_start=None, + pool_type=None, + context_proj_layer_name=None, + context_proj_param_attr=False, + fc_layer_name=None, + fc_param_attr=None, + fc_bias_attr=None, + fc_act=None, + pool_bias_attr=None, + fc_attr=None, + context_attr=None, + pool_attr=None): + """ + Text convolution pooling layers helper. + + Text input => Context Projection => FC Layer => Pooling => Output. + + :param name: name of output layer(pooling layer name) + :type name: basestring + :param input: name of input layer + :type input: LayerOutput + :param context_len: context projection length. See + context_projection's document. + :type context_len: int + :param hidden_size: FC Layer size. + :type hidden_size: int + :param context_start: context projection length. See + context_projection's context_start. + :type context_start: int or None + :param pool_type: pooling layer type. See pooling_layer's document. + :type pool_type: BasePoolingType. + :param context_proj_layer_name: context projection layer name. + None if user don't care. + :type context_proj_layer_name: basestring + :param context_proj_param_attr: context projection parameter attribute. + None if user don't care. + :type context_proj_param_attr: ParameterAttribute or None. + :param fc_layer_name: fc layer name. None if user don't care. + :type fc_layer_name: basestring + :param fc_param_attr: fc layer parameter attribute. None if user don't care. + :type fc_param_attr: ParameterAttribute or None + :param fc_bias_attr: fc bias parameter attribute. False if no bias, + None if user don't care. + :type fc_bias_attr: ParameterAttribute or None + :param fc_act: fc layer activation type. None means tanh + :type fc_act: BaseActivation + :param pool_bias_attr: pooling layer bias attr. None if don't care. + False if no bias. + :type pool_bias_attr: ParameterAttribute or None. + :param fc_attr: fc layer extra attribute. + :type fc_attr: ExtraLayerAttribute + :param context_attr: context projection layer extra attribute. + :type context_attr: ExtraLayerAttribute + :param pool_attr: pooling layer extra attribute. + :type pool_attr: ExtraLayerAttribute + :return: output layer name. + :rtype: LayerOutput + """ + # Set Default Value to param + context_proj_layer_name = "%s_conv_proj" % name \ + if context_proj_layer_name is None else context_proj_layer_name + + with layer.mixed( + name=context_proj_layer_name, + size=input_size * context_len, + act=activation.Linear(), + layer_attr=context_attr) as m: + m += layer.context_projection( + input=input, + context_len=context_len, + context_start=context_start, + padding_attr=context_proj_param_attr) + + fc_layer_name = "%s_conv_fc" % name \ + if fc_layer_name is None else fc_layer_name + fl = layer.fc(name=fc_layer_name, + input=m, + size=hidden_size, + act=fc_act, + layer_attr=fc_attr, + param_attr=fc_param_attr, + bias_attr=fc_bias_attr) + + return layer.pooling( + name=name, + input=fl, + pooling_type=pool_type, + bias_attr=pool_bias_attr, + layer_attr=pool_attr) + + +def convolution_net(input_dim, + class_dim=2, + emb_dim=128, + hid_dim=128, + is_predict=False): + data = layer.data("word", data_type.integer_value_sequence(input_dim)) + emb = layer.embedding(input=data, size=emb_dim) + conv_3 = sequence_conv_pool( + input=emb, input_size=emb_dim, context_len=3, hidden_size=hid_dim) + conv_4 = sequence_conv_pool( + input=emb, input_size=emb_dim, context_len=4, hidden_size=hid_dim) + output = layer.fc(input=[conv_3, conv_4], + size=class_dim, + act=activation.Softmax()) + lbl = layer.data("label", data_type.integer_value(2)) + cost = layer.classification_cost(input=output, label=lbl) + return cost + + +def stacked_lstm_net(input_dim, + class_dim=2, + emb_dim=128, + hid_dim=512, + stacked_num=3, + is_predict=False): + """ + A Wrapper for sentiment classification task. + This network uses bi-directional recurrent network, + consisting three LSTM layers. This configure is referred to + the paper as following url, but use fewer layrs. + http://www.aclweb.org/anthology/P15-1109 + + input_dim: here is word dictionary dimension. + class_dim: number of categories. + emb_dim: dimension of word embedding. + hid_dim: dimension of hidden layer. + stacked_num: number of stacked lstm-hidden layer. + is_predict: is predicting or not. + Some layers is not needed in network when predicting. + """ + assert stacked_num % 2 == 1 + + layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5) + fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3) + lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.) + para_attr = [fc_para_attr, lstm_para_attr] + bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.) + relu = activation.Relu() + linear = activation.Linear() + + data = layer.data("word", data_type.integer_value_sequence(input_dim)) + emb = layer.embedding(input=data, size=emb_dim) + + fc1 = layer.fc(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr) + lstm1 = layer.lstmemory( + input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) + + inputs = [fc1, lstm1] + for i in range(2, stacked_num + 1): + fc = layer.fc(input=inputs, + size=hid_dim, + act=linear, + param_attr=para_attr, + bias_attr=bias_attr) + lstm = layer.lstmemory( + input=fc, + reverse=(i % 2) == 0, + act=relu, + bias_attr=bias_attr, + layer_attr=layer_attr) + inputs = [fc, lstm] + + fc_last = layer.pooling(input=inputs[0], pooling_type=MaxPooling()) + lstm_last = layer.pooling(input=inputs[1], pooling_type=MaxPooling()) + output = layer.fc(input=[fc_last, lstm_last], + size=class_dim, + act=activation.Softmax(), + bias_attr=bias_attr, + param_attr=para_attr) + + lbl = layer.data("label", data_type.integer_value(2)) + cost = layer.classification_cost(input=output, label=lbl) + return cost + + +if __name__ == '__main__': + # init + paddle.init(use_gpu=True, trainer_count=4) + + # network config + print 'load dictionary...' + word_dict = imdb.word_dict() + dict_dim = len(word_dict) + class_dim = 2 + + # Please choose the way to build the network + # by uncommenting the corresponding line. + cost = convolution_net(dict_dim, class_dim=class_dim) + # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + adam_optimizer = paddle.optimizer.Adam( + learning_rate=2e-3, + regularization=paddle.optimizer.L2Regularization(rate=8e-4), + model_average=paddle.optimizer.ModelAverage(average_window=0.5)) + + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + lambda: imdb.test(word_dict), batch_size=128), + reader_dict={'word': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + + # create trainer + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=adam_optimizer) + + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + lambda: imdb.train(word_dict), buf_size=1000), + batch_size=100), + event_handler=event_handler, + reader_dict={'word': 0, + 'label': 1}, + num_passes=10) diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md index 17d52b9e20b8130688028092421f4b33f44763ac..03119fdd74502a4534c2e6a576580ce96a721c7e 100644 --- a/doc/design/reader/README.md +++ b/doc/design/reader/README.md @@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t - A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. - A *reader creator* is a function that returns a reader function. -- A *reader* decorator is a function, which accepts one or more readers, and returns a reader. +- A *reader decorator* is a function, which accepts one or more readers, and returns a reader. +- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items. -and provide frequently used reader creators and reader decorators. +and provide function which converts reader to batch reader, frequently used reader creators and reader decorators. ## Data Reader Interface @@ -37,9 +38,54 @@ def reader_creator_random_imageand_label(widht, height, label): return reader ``` +## Batch Reader Interface + +*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple. + +Here are valid outputs: +```python +# a mini batch of three data items. Each data item consist three columns of data, each of which is 1. +[(1, 1, 1), +(2, 2, 2), +(3, 3, 3)] + +# a mini batch of three data items, each data item is a list (single column). +[([1,1,1],), +([2,2,2],), +([3,3,3],), +``` + +Please note that each item inside the list must be a tuple, below is an invalid output: +```python + # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],). + # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1], + # or three column of datas, each of which is 1. +[[1,1,1], +[2,2,2], +[3,3,3]] +``` + +It's easy to convert from reader to batch reader: +```python +mnist_train = paddle.dataset.mnist.train() +mnist_train_batch_reader = paddle.batch(mnist_train, 128) +``` + +Also easy to create custom batch reader: +```python +def custom_batch_reader(): + while True: + batch = [] + for i in xrange(128): + batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. + yield batch + +mnist_random_image_batch_reader = custom_batch_reader +``` + ## Usage -data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: +batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: ```python # two data layer is created: @@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...) label_layer = paddle.layer.data("label", ...) # ... - -paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...) +batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128) +paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...) ``` ## Data Reader Decorator @@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i Use `paddle.reader.buffered` to prefetch data: ```python -buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100) +buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100) ``` `buffered_reader` will try to buffer (prefetch) `100` data entries. @@ -91,10 +137,10 @@ def reader_creator_bool(t): true_reader = reader_creator_bool(True) false_reader = reader_creator_bool(False) -reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader) -# Skipped 1 because paddle.dataset.mnist produces two items per data entry. +reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader) +# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry. # And we don't care second item at this time. -paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) +paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) ``` ### Shuffle @@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader Example: ```python -reader = paddle.reader.shuffle(paddle.dataset.mnist, 512) +reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512) ``` ## Q & A -### Why return only a single entry, but not a mini batch? +### Why reader return only a single entry, but not a mini batch? + +Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). + +We provide function `paddle.batch` to turn (single entry) reader into batch reader. -If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`. +### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient? -Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). +In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically. ### Why use a dictionary but not a list to provide mapping? @@ -137,7 +187,7 @@ def image_reader_creator(image_path, label_path, n): # images_reader_creator creates a reader reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) -paddle.train(reader, {"image":0, "label":1}, ...) +paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...) ``` ### How is `paddle.train` implemented @@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...) An example implementation of paddle.train could be: ```python -def make_minibatch(reader, minibatch_size): - def ret(): - r = reader() - buf = [r.next() for x in xrange(minibatch_size)] - while len(buf) > 0: - yield buf - buf = [r.next() for x in xrange(minibatch_size)] - return ret - -def train(reader, mapping, batch_size, total_pass): +def train(batch_reader, mapping, batch_size, total_pass): for pass_idx in range(total_pass): - for mini_batch in make_minibatch(reader): # this loop will never end in online learning. + for mini_batch in batch_reader(): # this loop will never end in online learning. do_forward_backward(mini_batch, mapping) ``` diff --git a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py index f1a770ccb54fbd7d4c3cf6bf134d00d7bf5961ca..935c12bb67e1fe08bc135a7a2220fcd43c548482 100755 --- a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py +++ b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py @@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None): logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) if not os.path.exists(JOB_PATH_OUTPUT): os.makedirs(JOB_PATH_OUTPUT) - os.mkdir(logDir) + if not os.path.exists(logDir): + os.mkdir(logDir) copyCommand = 'cp -rf ' + JOB_PATH + \ "/" + str(trainerId) + "/data/*" + " ./data/" os.system(copyCommand) diff --git a/paddle/scripts/travis/before_install.osx.sh b/paddle/scripts/travis/before_install.osx.sh deleted file mode 100755 index 80f031a74e7052d183b5ef21d432476ff1cce722..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/before_install.osx.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -brew update -brew tap homebrew/science -brew install openblas swig md5sha1sum diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh index 5e6350b57458594163f23cca41a546d7bd9b1eda..7deb3e62e88de7e1306fcbfc5a28aa4372d678e6 100755 --- a/paddle/scripts/travis/build_and_test.sh +++ b/paddle/scripts/travis/build_and_test.sh @@ -2,18 +2,11 @@ source ./common.sh NPROC=1 -if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages - export PYTHONHOME=/opt/python/2.7.12 - export PATH=/opt/python/2.7.12/bin:${PATH} - cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} - NRPOC=`nproc` - make -j $NPROC - make coveralls - sudo make install -elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then - export PYTHONPATH=/usr/local/lib/python2.7/site-packages - cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} - NPROC=`sysctl -n hw.ncpu` - make -j $NPROC -fi +export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages +export PYTHONHOME=/opt/python/2.7.12 +export PATH=/opt/python/2.7.12/bin:${PATH} +cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} +NRPOC=`nproc` +make -j $NPROC +make coveralls +sudo make install diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index d548d1adaafacdb097dbe476fdc76651c9f46b6b..8ab8cd2f85d5d7bcf86c2f57b350dfcd99177b69 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import event import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 035f96b0f2e978a413a1ebe0ec115f75ff07befc..fa2ccec6c3270541dd6b13fdfd2323d10ceac642 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -22,6 +22,7 @@ class Layer(object): def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.__contex__ = {} self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -39,16 +40,38 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) - - return context[self.name] + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) + self.__contex__ = context + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + + def use_context_name(self): + return False + + def calculate_size(self): + """ + lazy calculate size of the layer, should be called when to_proto_impl of + this layer is called. + :return: + """ + return self.__contex__[self.context_name()].size + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index d222739ba2c0fe1913d7625b209f8b7fbd9e4f39..a1b21bab3bac8b304abb4ae292b1c1e9f3e719de 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -1,7 +1,22 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import mnist import imikolov import imdb import cifar import movielens +import conll05 -__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens'] +__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05'] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 77c54bd268b5d988b0802a3edca91605e56f730e..5c6f5d85567fa19f2835ee4f3951531b6dfd3209 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -1,6 +1,20 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html """ + import cPickle import itertools import numpy diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index fcf4437ffaf329f52cc5bc997eff45dee200873c..397c9e66d495431f412c22b9b1d19ee32257b2dd 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import requests import hashlib import os diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py new file mode 100644 index 0000000000000000000000000000000000000000..e96a701c1a944e2d6d84f897157cb357c5aa0824 --- /dev/null +++ b/python/paddle/v2/dataset/conll05.py @@ -0,0 +1,196 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tarfile +import gzip +import itertools +from common import download + +__all__ = ['test, get_dict', 'get_embedding'] +""" +Conll 2005 dataset. Paddle semantic role labeling Book and demo use this +dataset as an example. Because Conll 2005 is not free in public, the default +downloaded URL is test set of Conll 2005 (which is public). Users can change +URL and MD5 to their Conll dataset. +""" + +DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' +DATA_MD5 = '387719152ae52d60422c016e92a742fc' +WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' +WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' +VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt' +VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' +TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt' +TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' +EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb' +EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' + +UNK_IDX = 0 + + +def load_dict(filename): + d = dict() + with open(filename, 'r') as f: + for i, line in enumerate(f): + d[line.strip()] = i + return d + + +def corpus_reader(data_path, words_name, props_name): + """ + Read one corpus. It returns an iterator. Each element of + this iterator is a tuple including sentence and labels. The sentence is + consist of a list of word IDs. The labels include a list of label IDs. + :return: a iterator of data. + :rtype: iterator + """ + + def reader(): + tf = tarfile.open(data_path) + wf = tf.extractfile(words_name) + pf = tf.extractfile(props_name) + with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile( + fileobj=pf) as props_file: + sentences = [] + labels = [] + one_seg = [] + for word, label in itertools.izip(words_file, props_file): + word = word.strip() + label = label.strip().split() + + if len(label) == 0: # end of sentence + for i in xrange(len(one_seg[0])): + a_kind_lable = [x[i] for x in one_seg] + labels.append(a_kind_lable) + + if len(labels) >= 1: + verb_list = [] + for x in labels[0]: + if x != '-': + verb_list.append(x) + + for i, lbl in enumerate(labels[1:]): + cur_tag = 'O' + is_in_bracket = False + lbl_seq = [] + verb_word = '' + for l in lbl: + if l == '*' and is_in_bracket == False: + lbl_seq.append('O') + elif l == '*' and is_in_bracket == True: + lbl_seq.append('I-' + cur_tag) + elif l == '*)': + lbl_seq.append('I-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') != -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') == -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = True + else: + raise RuntimeError('Unexpected label: %s' % + l) + + yield sentences, verb_list[i], lbl_seq + + sentences = [] + labels = [] + one_seg = [] + else: + sentences.append(word) + one_seg.append(label) + + pf.close() + wf.close() + tf.close() + + return reader + + +def reader_creator(corpus_reader, + word_dict=None, + predicate_dict=None, + label_dict=None): + def reader(): + for sentence, predicate, labels in corpus_reader(): + + sen_len = len(sentence) + + verb_index = labels.index('B-V') + mark = [0] * len(labels) + if verb_index > 0: + mark[verb_index - 1] = 1 + ctx_n1 = sentence[verb_index - 1] + else: + ctx_n1 = 'bos' + + if verb_index > 1: + mark[verb_index - 2] = 1 + ctx_n2 = sentence[verb_index - 2] + else: + ctx_n2 = 'bos' + + mark[verb_index] = 1 + ctx_0 = sentence[verb_index] + + if verb_index < len(labels) - 1: + mark[verb_index + 1] = 1 + ctx_p1 = sentence[verb_index + 1] + else: + ctx_p1 = 'eos' + + if verb_index < len(labels) - 2: + mark[verb_index + 2] = 1 + ctx_p2 = sentence[verb_index + 2] + else: + ctx_p2 = 'eos' + + word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] + + ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len + ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len + ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len + ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len + ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len + + pred_idx = [predicate_dict.get(predicate)] * sen_len + label_idx = [label_dict.get(w) for w in labels] + + yield word_idx, ctx_n2_idx, ctx_n1_idx, \ + ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx + + return reader + + +def get_dict(): + word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) + verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) + label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + return word_dict, verb_dict, label_dict + + +def get_embedding(): + return download(EMB_URL, 'conll05st', EMB_MD5) + + +def test(): + word_dict, verb_dict, label_dict = get_dict() + reader = corpus_reader( + download(DATA_URL, 'conll05st', DATA_MD5), + words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', + props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') + return reader_creator(reader, word_dict, verb_dict, label_dict) diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 433e37380f840f5b7ff619a5f64b99d2ad724b17..f27756a38a9cd809fdaaf92e7f8a72b681915fc8 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -1,6 +1,3 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +14,7 @@ """ IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz """ + import paddle.v2.dataset.common import tarfile import Queue @@ -118,3 +116,8 @@ def test(word_idx): return reader_creator( re.compile("aclImdb/test/pos/.*\.txt$"), re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) + + +def word_dict(): + return build_dict( + re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index b3791ddad66e588356338150fccadbcc8fa113ca..285d3eaca8317c78dc84e99b4d524a0f4872c687 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ """ diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index ebcdff78b317ceb4811048ac78982e072962fa9c..6a621a2aaad14bf9598b838ce7c2ebf297bb0d30 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ MNIST dataset. """ diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index dcffcff2f58c63d451761d37f14127d730faf621..c22bcfa38b5f501732768dd4f62d8e088d57a7ff 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import zipfile from common import download import re diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py index a2af45ecf508462fe4b596b5d8d6401c5b974eff..e0e18229da7818be5752ee592e094a00da286ad9 100644 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ b/python/paddle/v2/dataset/tests/cifar_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.cifar import unittest diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py index 7d8406171b8478e4a8331637c5e867c18d5eb3d8..5babcef0eb4345d243904877d323c37d4889a643 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/v2/dataset/tests/common_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.common import unittest import tempfile diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py index e887af16634d2db04b8cf5fa0269a69991d8baac..c4d82f26895d77d05c6e936bd636b1239e1a0cd8 100644 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ b/python/paddle/v2/dataset/tests/imdb_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.imdb import unittest import re diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py index b4408cc2f590d4d8da4ce5e98213cf7b208cfc15..1d344cac3e7483a351033570fbec75a4d19f4a55 100644 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ b/python/paddle/v2/dataset/tests/mnist_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.mnist import unittest diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index 6a7bcb81879c0a364a1d896071055a6247e96c19..8cbf9b9b1faac0ee0875297756853e632e4f2498 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -34,8 +34,9 @@ class WithMetric(object): class TestResult(WithMetric): - def __init__(self, evaluator): + def __init__(self, evaluator, cost): super(TestResult, self).__init__(evaluator) + self.cost = cost class BeginPass(object): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 67111f1315fbb0f55c1db0f6fe89fc988c8d83f6..2f55611aaa1d3ae22f5d7f184b38e622271881ea 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -65,19 +65,24 @@ to be in a Python function but could be anywhere. Also, the creation of a protobuf message is hidden in the invocation of paddle.v2.parameters.create, no longer exposed to users. """ + +import collections +import inspect from config_base import Layer, __convert_to_v2__ import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ - -from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import \ wrap_bias_attr_default +from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.layers import layer_support +from paddle.trainer.config_parser import \ + RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \ + RecurrentLayerGroupEnd, model_type -import data_type import activation +import data_type __all__ = ['parse_network', 'data'] @@ -130,6 +135,137 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class WithExtraParent(Layer): + def extra_parent(self): + return self.__extra_parent__ + + def __init__(self, name=None, parent_layers=None): + self.__extra_parent__ = [] + super(WithExtraParent, self).__init__( + name=name, parent_layers=parent_layers) + + def append_extra_parent(self, parent): + self.__extra_parent__.append(parent) + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for p in self.__extra_parent__: + p.to_proto(context=context) + + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.context_name() is None: + return self.to_proto_impl(context=context, **kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl( + context=context, **kwargs) + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] + + +class MemoryV2(WithExtraParent): + def __init__(self, name, **kwargs): + self.name = name + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + self.__kwargs__ = kwargs + self.__boot_layer_name__ = None + if 'boot_layer' in kwargs: + begin_of_current_rnn = [] + # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a + # function inside step. + st = inspect.stack() + for i in xrange(len(st)): + locs = inspect.stack()[i][0].f_locals + keys = locs.keys() + for key in keys: + val = locs[key] + if isinstance(val, RecurrentLayerInput): + begin_of_current_rnn.append(val) + elif isinstance(val, collections.Sequence): + for v in val: + if isinstance(v, RecurrentLayerInput): + begin_of_current_rnn.append(v) + + if begin_of_current_rnn: + break + assert begin_of_current_rnn is not None + for extra in begin_of_current_rnn: + self.append_extra_parent(extra) + assert isinstance(extra, WithExtraParent) + extra.append_extra_parent(kwargs['boot_layer']) + self.__boot_layer_name__ = kwargs['boot_layer'].name + + def to_proto_impl(self, context, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + + if self.__boot_layer_name__ is not None: + args['boot_layer'] = context[self.__boot_layer_name__] + + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size + return conf_helps.memory(name=self.name, **args) + + def context_name(self): + return self.name + "#memory" + + def use_context_name(self): + """ + memory layer will have the same name with some layer + :return: + """ + return True + + +class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + + def __init__(self, layer_output): + assert isinstance(layer_output, conf_helps.LayerOutput) + self.layer_output = layer_output + super(LayerOutputV2, self).__init__( + name=layer_output.name, parent_layers=dict()) + + def to_proto_impl(self): + return self.layer_output + + +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None + + class MixedLayerV2(Layer): """ This class is use to support `with` grammar. If not, the following code @@ -161,7 +297,6 @@ class MixedLayerV2(Layer): other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -171,7 +306,7 @@ class MixedLayerV2(Layer): self.__inputs__.append(other) return self else: - raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + raise MixedLayerV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): assert len(self.__inputs__) == 0 @@ -186,6 +321,13 @@ class MixedLayerV2(Layer): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -202,14 +344,51 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +class RecurrentLayerInput(WithExtraParent): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerInput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".begin" + + def to_proto_impl(self, context, **kwargs): + model_type('recurrent_nn') + RecurrentLayerGroupWithoutOutLinksBegin( + name=self.__recurrent_name__, + in_links=map(lambda x: x.name, self.__parents__)) + return self + + +class RecurrentLayerOutput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerOutput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".end" + + def to_proto_impl(self, **kwargs): + for l in self.__parents__: + RecurrentLayerGroupSetOutLink(l.name) + RecurrentLayerGroupEnd(name=self.__recurrent_name__) + + LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel +memory = MemoryV2 def __layer_name_mapping__(inname): - if inname in ['data_layer', 'memory', 'mixed_layer']: + if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']: # Do Not handle these layers return elif inname == 'maxid_layer': @@ -231,8 +410,10 @@ def __layer_name_mapping__(inname): def __layer_name_mapping_parent_names__(inname): all_args = getattr(conf_helps, inname).argspec.args return filter( - lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', - 'weights', 'vectors', 'weight', 'score', 'left', 'right'], + lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', + 'expand_as', + 'weights', 'vectors', 'weight', 'score', 'left', + 'right', 'output_mem'], all_args) @@ -267,3 +448,54 @@ operator_list = [ for op in operator_list: globals()[op[0]] = __convert_to_v2__( op[0], parent_names=op[1], is_default_name=False) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), + input) + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': non_static_inputs}) + for i in xrange(len(non_static_inputs)) + ] + + def __real_step__(*args): + rnn_input = list(args) + static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) + for static_input in static_inputs: + mem_name = "__%s_memory__" % static_input.input.name + mem = memory( + name=mem_name, + is_seq=static_input.is_seq, + size=static_input.input.calculate_size, + boot_layer=static_input.input) + with mixed( + name=mem_name, + size=static_input.input.calculate_size, + act=activation.Identity()) as mix: + mix += identity_projection(input=mem) + rnn_input.insert(input.index(static_input), mix) + return step(*rnn_input) + + actual_output = __real_step__(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 46b5d08b8761ea58530f5aefe5d1947408727f85..572deaff356712cac23cd7911cdf289db100564c 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,12 +1,16 @@ +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) + add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) -add_test(NAME test_v2_api - COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) -add_test(NAME topology_test +add_test(NAME test_topology COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbbd20eb76bb9daab2bcf98c4adad989106a377 --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,155 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + boot_layer = layer.data( + name="boot", type=data_type.dense_vector(10)) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory( + name="rnn_state", size=hidden_dim, boot_layer=boot_layer) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index a4ef0df597dd7f52833a0176333dabc93d02c18d..b4a713f7d53c2c5aef7e356906f88475037fa8d2 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -107,12 +107,7 @@ class SGD(ITrainer): event_handler(v2_event.BeginPass(pass_id)) pass_evaluator.start() updater.startPass() - total_cost_sum = 0 - total_batch = 0 for batch_id, data_batch in enumerate(reader()): - pass_type = updater.startBatch(len(data_batch)) - self.__gradient_machine__.forwardBackward( - feeder(data_batch), out_args, pass_type) batch_evaluator.start() event_handler( v2_event.BeginIteration( @@ -125,12 +120,8 @@ class SGD(ITrainer): for each_param in self.__gradient_machine__.getNonStaticParameters( ): updater.update(each_param) - # Get cost. We use numpy to calculate total cost for this batch. - cost_vec = out_args.getSlotValue(0) - cost_vec = cost_vec.copyToNumpyMat() - cost = cost_vec.sum() / len(data_batch) - total_cost_sum += cost_vec.sum() - total_batch += len(data_batch) + cost_sum = out_args.sumCosts() + cost = cost_sum / len(data_batch) updater.finishBatch(cost) batch_evaluator.finish() event_handler( @@ -142,11 +133,7 @@ class SGD(ITrainer): updater.finishPass() pass_evaluator.finish() - event_handler( - v2_event.EndPass( - pass_id, - cost=total_cost_sum / total_batch, - evaluator=pass_evaluator)) + event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) self.__gradient_machine__.finish() def default_reader_dict(self): @@ -163,13 +150,18 @@ class SGD(ITrainer): evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) evaluator.start() + total_cost = 0 + num_samples = 0.0 for data_batch in reader(): + num_samples += len(data_batch) self.__gradient_machine__.forward( feeder(data_batch), out_args, api.PASS_TEST) + total_cost += out_args.sumCosts() self.__gradient_machine__.eval(evaluator) evaluator.finish() - return v2_event.TestResult(evaluator=evaluator) + return v2_event.TestResult( + evaluator=evaluator, cost=total_cost / num_samples) def __check_train_args__(reader, event_handler, **kwargs):