diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..c614602cb8eb8d00a69da64953de2b1b93d583b7 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "book"] + path = book + url = https://github.com/PaddlePaddle/book.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6e45028ebc3f53ea20806f0dd2a7acc820607fe..3402223b044b8950e7772f4d87cc64e5772f8dcd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,12 +2,12 @@ sha: c25201a00e6b0514370501050cf2a8538ac12270 hooks: - id: remove-crlf - files: (?!.*third_party)^.*$ + files: (?!.*third_party)^.*$ | (?!.*book)^.*$ - repo: https://github.com/reyoung/mirrors-yapf.git sha: v0.13.2 hooks: - id: yapf - files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax. + files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ - repo: https://github.com/pre-commit/pre-commit-hooks sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469 hooks: @@ -15,7 +15,7 @@ - id: check-merge-conflict - id: check-symlinks - id: detect-private-key - files: (?!.*third_party)^.*$ + files: (?!.*third_party)^.*$ | (?!.*book)^.*$ - id: end-of-file-fixer - repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 diff --git a/authors b/authors index ab4d3118ff1f7e94677c89073c4ea05bf991165e..daac4ec5d8173cba95df9f9b3c69c02b5256f5b2 100644 --- a/authors +++ b/authors @@ -29,13 +29,16 @@ Luo, Tao Lyu, Qin Mao, Hongyue Qian, Xiaojun +Qiao, Longfei Qi, Jun Qin, Duohao Shen, Guolong Shi, Guangchuan Song, Xiang +Wang, Helin Wang, Jiang Wang, Yanfei +Wang, Yi Wang, Yong Weng, Renliang Xu, Tianbing diff --git a/book b/book new file mode 160000 index 0000000000000000000000000000000000000000..22ed2a01aee872f055b5f5f212428f481cefc10d --- /dev/null +++ b/book @@ -0,0 +1 @@ +Subproject commit 22ed2a01aee872f055b5f5f212428f481cefc10d diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 968d41801d73c4082d2673efe415c1cdd0305b5e..900f59d4cb83bc9ce1893b2d3bd95f5a08b164bb 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,9 +1,9 @@ # Use ccache if found ccache program -find_program(CCACHE_FOUND ccache) +find_program(CCACHE_PATH ccache) -if(CCACHE_FOUND) +if(CCACHE_PATH) message(STATUS "Ccache is founded, use ccache to speed up compile.") - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) - set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) -endif(CCACHE_FOUND) \ No newline at end of file + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) +endif(CCACHE_PATH) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 26da7e8e384bafdcbcd1a358c39cc6eb167b067e..1575d8e9f5613e972df672b1daae145595676e8b 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -14,7 +14,7 @@ INCLUDE(ExternalProject) -FIND_PACKAGE(Protobuf) +FIND_PACKAGE(Protobuf 3.1) IF(NOT PROTOBUF_FOUND) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py index e0fc0e04bbd21f691caa1ce3fb95c8a7065d1b3f..53cffa6fb4e8b2e19725f4f44bf7b9ffffb25232 100644 --- a/demo/image_classification/api_v2_train.py +++ b/demo/image_classification/api_v2_train.py @@ -13,9 +13,10 @@ # limitations under the License import sys + import paddle.v2 as paddle + from api_v2_vgg import vgg_bn_drop -from api_v2_resnet import resnet_cifar10 def main(): @@ -23,16 +24,16 @@ def main(): classdim = 10 # PaddlePaddle init - paddle.init(use_gpu=True, trainer_count=1) + paddle.init(use_gpu=False, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(datadim)) # Add neural network config # option 1. resnet - net = resnet_cifar10(image, depth=32) + # net = resnet_cifar10(image, depth=32) # option 2. vgg - # net = vgg_bn_drop(image) + net = vgg_bn_drop(image) out = paddle.layer.fc(input=net, size=classdim, @@ -68,8 +69,8 @@ def main(): result = trainer.test( reader=paddle.batch( paddle.dataset.cifar.test10(), batch_size=128), - reader_dict={'image': 0, - 'label': 1}) + feeding={'image': 0, + 'label': 1}) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) # Create trainer @@ -83,8 +84,8 @@ def main(): batch_size=128), num_passes=5, event_handler=event_handler, - reader_dict={'image': 0, - 'label': 1}) + feeding={'image': 0, + 'label': 1}) if __name__ == '__main__': diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py index 75dd65f9fc8cd8e7fab5bf30a6337574a645e89f..84125c3b4b621a128fd488ff7fa374a75f620bf1 100644 --- a/demo/introduction/api_train_v2.py +++ b/demo/introduction/api_train_v2.py @@ -30,26 +30,26 @@ def main(): def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.reader.batched( - uci_housing.test(), batch_size=2), - reader_dict={'x': 0, + if (event.pass_id + 1) % 10 == 0: + result = trainer.test( + reader=paddle.batch( + uci_housing.test(), batch_size=2), + feeding={'x': 0, 'y': 1}) - if event.pass_id % 10 == 0: - print "Test %d, %s" % (event.pass_id, result.metrics) + print "Test %d, %.2f" % (event.pass_id, result.cost) # training trainer.train( - reader=paddle.reader.batched( + reader=paddle.batch( paddle.reader.shuffle( uci_housing.train(), buf_size=500), batch_size=2), - reader_dict={'x': 0, - 'y': 1}, + feeding={'x': 0, + 'y': 1}, event_handler=event_handler, num_passes=30) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 072b2a08da6db1f6ae7b84ee66dbc88aef487deb..6b95a88042a13a280bcb80f753b3887fcef37296 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -92,18 +92,14 @@ def main(): def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1000 == 0: - result = trainer.test(reader=paddle.reader.batched( - paddle.dataset.mnist.test(), batch_size=256)) - - print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics, - result.metrics) + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) with gzip.open('params.tar.gz', 'w') as f: parameters.to_tar(f) elif isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=paddle.reader.batched( + result = trainer.test(reader=paddle.batch( paddle.dataset.mnist.test(), batch_size=128)) print "Test with Pass %d, Cost %f, %s\n" % ( event.pass_id, result.cost, result.metrics) @@ -123,17 +119,17 @@ def main(): print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) + test_creator = paddle.dataset.mnist.test() + test_data = [] + for item in test_creator(): + test_data.append((item[0], )) + if len(test_data) == 100: + break + # output is a softmax layer. It returns probabilities. # Shape should be (100, 10) probs = paddle.infer( - output=predict, - parameters=parameters, - reader=paddle.batch( - paddle.reader.firstn( - paddle.reader.map_readers(lambda item: (item[0], ), - paddle.dataset.mnist.test()), - n=100), - batch_size=32)) + output_layer=predict, parameters=parameters, input=test_data) print probs.shape diff --git a/demo/recommendation/api_train_v2.py b/demo/recommendation/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..9b254933a1de60bf8d74517f0d52401d334703b7 --- /dev/null +++ b/demo/recommendation/api_train_v2.py @@ -0,0 +1,125 @@ +import paddle.v2 as paddle +import cPickle +import copy + + +def main(): + paddle.init(use_gpu=False) + movie_title_dict = paddle.dataset.movielens.get_movie_title_dict() + uid = paddle.layer.data( + name='user_id', + type=paddle.data_type.integer_value( + paddle.dataset.movielens.max_user_id() + 1)) + usr_emb = paddle.layer.embedding(input=uid, size=32) + + usr_gender_id = paddle.layer.data( + name='gender_id', type=paddle.data_type.integer_value(2)) + usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16) + + usr_age_id = paddle.layer.data( + name='age_id', + type=paddle.data_type.integer_value( + len(paddle.dataset.movielens.age_table))) + usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16) + + usr_job_id = paddle.layer.data( + name='job_id', + type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id( + ) + 1)) + + usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16) + + usr_combined_features = paddle.layer.fc( + input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb], + size=200, + act=paddle.activation.Tanh()) + + mov_id = paddle.layer.data( + name='movie_id', + type=paddle.data_type.integer_value( + paddle.dataset.movielens.max_movie_id() + 1)) + mov_emb = paddle.layer.embedding(input=mov_id, size=32) + + mov_categories = paddle.layer.data( + name='category_id', + type=paddle.data_type.sparse_binary_vector( + len(paddle.dataset.movielens.movie_categories()))) + + mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) + + mov_title_id = paddle.layer.data( + name='movie_title', + type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) + mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32) + mov_title_conv = paddle.networks.sequence_conv_pool( + input=mov_title_emb, hidden_size=32, context_len=3) + + mov_combined_features = paddle.layer.fc( + input=[mov_emb, mov_categories_hidden, mov_title_conv], + size=200, + act=paddle.activation.Tanh()) + + inference = paddle.layer.cos_sim( + a=usr_combined_features, b=mov_combined_features, size=1, scale=5) + cost = paddle.layer.regression_cost( + input=inference, + label=paddle.layer.data( + name='score', type=paddle.data_type.dense_vector(1))) + + parameters = paddle.parameters.create(cost) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=paddle.optimizer.Adam( + learning_rate=1e-4)) + feeding = { + 'user_id': 0, + 'gender_id': 1, + 'age_id': 2, + 'job_id': 3, + 'movie_id': 4, + 'category_id': 5, + 'movie_title': 6, + 'score': 7 + } + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d Batch %d Cost %.2f" % ( + event.pass_id, event.batch_id, event.cost) + + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + paddle.dataset.movielens.train(), buf_size=8192), + batch_size=256), + event_handler=event_handler, + feeding=feeding, + num_passes=1) + + user_id = 234 + movie_id = 345 + + user = paddle.dataset.movielens.user_info()[user_id] + movie = paddle.dataset.movielens.movie_info()[movie_id] + + feature = user.value() + movie.value() + + def reader(): + yield feature + + infer_dict = copy.copy(feeding) + del infer_dict['score'] + + prediction = paddle.infer( + output=inference, + parameters=parameters, + reader=paddle.batch( + reader, batch_size=32), + feeding=infer_dict) + print(prediction + 5) / 2 + + +if __name__ == '__main__': + main() diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py index 15db922b97abc5ae79f095edfd632604eec8ab94..036cad4b0a32357bb42580ef577a1eba558be8fe 100644 --- a/demo/semantic_role_labeling/api_train_v2.py +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -163,11 +163,11 @@ def main(): update_equation=optimizer) parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) - trn_reader = paddle.reader.batched( + trn_reader = paddle.batch( paddle.reader.shuffle( conll05.test(), buf_size=8192), batch_size=10) - reader_dict = { + feeding = { 'word_data': 0, 'ctx_n2_data': 1, 'ctx_n1_data': 2, @@ -183,7 +183,7 @@ def main(): reader=trn_reader, event_handler=event_handler, num_passes=10000, - reader_dict=reader_dict) + feeding=feeding) if __name__ == '__main__': diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py index 3a266e74ea93068cad2757d0076a4ae664ad4cf8..1c856556bd0cb32f60eba322469b3621c37e1349 100644 --- a/demo/sentiment/train_v2.py +++ b/demo/sentiment/train_v2.py @@ -13,16 +13,10 @@ # limitations under the License. import sys -import paddle.trainer_config_helpers.attrs as attrs -from paddle.trainer_config_helpers.poolings import MaxPooling import paddle.v2 as paddle -def convolution_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=128, - is_predict=False): +def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): data = paddle.layer.data("word", paddle.data_type.integer_value_sequence(input_dim)) emb = paddle.layer.embedding(input=data, size=emb_dim) @@ -42,8 +36,7 @@ def stacked_lstm_net(input_dim, class_dim=2, emb_dim=128, hid_dim=512, - stacked_num=3, - is_predict=False): + stacked_num=3): """ A Wrapper for sentiment classification task. This network uses bi-directional recurrent network, @@ -56,16 +49,14 @@ def stacked_lstm_net(input_dim, emb_dim: dimension of word embedding. hid_dim: dimension of hidden layer. stacked_num: number of stacked lstm-hidden layer. - is_predict: is predicting or not. - Some layers is not needed in network when predicting. """ assert stacked_num % 2 == 1 - layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5) - fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3) - lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.) + layer_attr = paddle.attr.Extra(drop_rate=0.5) + fc_para_attr = paddle.attr.Param(learning_rate=1e-3) + lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.) para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.) + bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.) relu = paddle.activation.Relu() linear = paddle.activation.Linear() @@ -95,8 +86,10 @@ def stacked_lstm_net(input_dim, layer_attr=layer_attr) inputs = [fc, lstm] - fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling()) - lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling()) + fc_last = paddle.layer.pooling( + input=inputs[0], pooling_type=paddle.pooling.Max()) + lstm_last = paddle.layer.pooling( + input=inputs[1], pooling_type=paddle.pooling.Max()) output = paddle.layer.fc(input=[fc_last, lstm_last], size=class_dim, act=paddle.activation.Softmax(), @@ -110,14 +103,23 @@ def stacked_lstm_net(input_dim, if __name__ == '__main__': # init - paddle.init(use_gpu=True, trainer_count=4) + paddle.init(use_gpu=False) - # network config + #data print 'load dictionary...' word_dict = paddle.dataset.imdb.word_dict() dict_dim = len(word_dict) class_dim = 2 + train_reader = paddle.batch( + paddle.reader.shuffle( + lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), + batch_size=100) + test_reader = paddle.batch( + lambda: paddle.dataset.imdb.test(word_dict), batch_size=100) + + feeding = {'word': 0, 'label': 1} + # network config # Please choose the way to build the network # by uncommenting the corresponding line. cost = convolution_net(dict_dim, class_dim=class_dim) @@ -142,12 +144,7 @@ if __name__ == '__main__': sys.stdout.write('.') sys.stdout.flush() if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.reader.batched( - lambda: paddle.dataset.imdb.test(word_dict), - batch_size=128), - reader_dict={'word': 0, - 'label': 1}) + result = trainer.test(reader=test_reader, feeding=feeding) print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) # create trainer @@ -156,11 +153,7 @@ if __name__ == '__main__': update_equation=adam_optimizer) trainer.train( - reader=paddle.reader.batched( - paddle.reader.shuffle( - lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=100), + reader=train_reader, event_handler=event_handler, - reader_dict={'word': 0, - 'label': 1}, - num_passes=10) + feeding=feeding, + num_passes=2) diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py index a5f59ec379738eb5bed3e7559739cae38582ed06..5d138a8c4f91976d90b19441781248f7b67c854a 100644 --- a/demo/seqToseq/api_train_v2.py +++ b/demo/seqToseq/api_train_v2.py @@ -1,109 +1,145 @@ -import os - +import sys import paddle.v2 as paddle -from seqToseq_net_v2 import seqToseq_net_v2 - -# Data Definiation. -# TODO:This code should be merged to dataset package. -data_dir = "./data/pre-wmt14" -src_lang_dict = os.path.join(data_dir, 'src.dict') -trg_lang_dict = os.path.join(data_dir, 'trg.dict') - -source_dict_dim = len(open(src_lang_dict, "r").readlines()) -target_dict_dim = len(open(trg_lang_dict, "r").readlines()) - - -def read_to_dict(dict_path): - with open(dict_path, "r") as fin: - out_dict = { - line.strip(): line_count - for line_count, line in enumerate(fin) - } - return out_dict - - -src_dict = read_to_dict(src_lang_dict) -trg_dict = read_to_dict(trg_lang_dict) - -train_list = os.path.join(data_dir, 'train.list') -test_list = os.path.join(data_dir, 'test.list') - -UNK_IDX = 2 -START = "" -END = "" - -def _get_ids(s, dictionary): - words = s.strip().split() - return [dictionary[START]] + \ - [dictionary.get(w, UNK_IDX) for w in words] + \ - [dictionary[END]] - - -def train_reader(file_name): - def reader(): - with open(file_name, 'r') as f: - for line_count, line in enumerate(f): - line_split = line.strip().split('\t') - if len(line_split) != 2: - continue - src_seq = line_split[0] # one source sequence - src_ids = _get_ids(src_seq, src_dict) - - trg_seq = line_split[1] # one target sequence - trg_words = trg_seq.split() - trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words] - - # remove sequence whose length > 80 in training mode - if len(src_ids) > 80 or len(trg_ids) > 80: - continue - trg_ids_next = trg_ids + [trg_dict[END]] - trg_ids = [trg_dict[START]] + trg_ids - - yield src_ids, trg_ids, trg_ids_next - - return reader +def seqToseq_net(source_dict_dim, target_dict_dim): + ### Network Architecture + word_vector_dim = 512 # dimension of word vector + decoder_size = 512 # dimension of hidden unit in GRU Decoder network + encoder_size = 512 # dimension of hidden unit in GRU Encoder network + + #### Encoder + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) + src_embedding = paddle.layer.embedding( + input=src_word_id, + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) + + #### Decoder + with paddle.layer.mixed(size=decoder_size) as encoded_proj: + encoded_proj += paddle.layer.full_matrix_projection( + input=encoded_vector) + + backward_first = paddle.layer.first_seq(input=src_backward) + + with paddle.layer.mixed( + size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: + decoder_boot += paddle.layer.full_matrix_projection( + input=backward_first) + + def gru_decoder_with_attention(enc_vec, enc_proj, current_word): + + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) + + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) + + with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: + decoder_inputs += paddle.layer.full_matrix_projection(input=context) + decoder_inputs += paddle.layer.full_matrix_projection( + input=current_word) + + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) + + with paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax()) as out: + out += paddle.layer.full_matrix_projection(input=gru_step) + return out + + decoder_group_name = "decoder_group" + group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) + + # For decoder equipped with attention mechanism, in training, + # target embeding (the groudtruth) is the data input, + # while encoded source sequence is accessed to as an unbounded memory. + # Here, the StaticInput defines a read-only memory + # for the recurrent_group. + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) + + lbl = paddle.layer.data( + name='target_language_next_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)) + cost = paddle.layer.classification_cost(input=decoder, label=lbl) + + return cost def main(): paddle.init(use_gpu=False, trainer_count=1) + # source and target dict dim. + dict_size = 30000 + source_dict_dim = target_dict_dim = dict_size + # define network topology - cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) + cost = seqToseq_net(source_dict_dim, target_dict_dim) parameters = paddle.parameters.create(cost) # define optimize method and trainer - optimizer = paddle.optimizer.Adam(learning_rate=1e-4) + optimizer = paddle.optimizer.Adam( + learning_rate=5e-5, + regularization=paddle.optimizer.L2Regularization(rate=1e-3)) trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer) # define data reader - reader_dict = { + feeding = { 'source_language_word': 0, 'target_language_word': 1, 'target_language_next_word': 2 } - wmt14_reader = paddle.reader.batched( + wmt14_reader = paddle.batch( paddle.reader.shuffle( - train_reader("data/pre-wmt14/train/train"), buf_size=8192), + paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192), batch_size=5) # define event_handler callback def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( + print "\nPass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() # start to train trainer.train( reader=wmt14_reader, event_handler=event_handler, num_passes=10000, - reader_dict=reader_dict) + feeding=feeding) if __name__ == '__main__': diff --git a/demo/seqToseq/seqToseq_net_v2.py b/demo/seqToseq/seqToseq_net_v2.py deleted file mode 100644 index 058a6789d7094c71492ed9772ed5594c4c0c8f84..0000000000000000000000000000000000000000 --- a/demo/seqToseq/seqToseq_net_v2.py +++ /dev/null @@ -1,92 +0,0 @@ -import paddle.v2 as paddle - - -def seqToseq_net_v2(source_dict_dim, target_dict_dim): - ### Network Architecture - word_vector_dim = 512 # dimension of word vector - decoder_size = 512 # dimension of hidden unit in GRU Decoder network - encoder_size = 512 # dimension of hidden unit in GRU Encoder network - - #### Encoder - src_word_id = paddle.layer.data( - name='source_language_word', - type=paddle.data_type.integer_value_sequence(source_dict_dim)) - src_embedding = paddle.layer.embedding( - input=src_word_id, - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) - src_forward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size) - src_backward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size, reverse=True) - encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) - - #### Decoder - with paddle.layer.mixed(size=decoder_size) as encoded_proj: - encoded_proj += paddle.layer.full_matrix_projection( - input=encoded_vector) - - backward_first = paddle.layer.first_seq(input=src_backward) - - with paddle.layer.mixed( - size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: - decoder_boot += paddle.layer.full_matrix_projection( - input=backward_first) - - def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - - decoder_mem = paddle.layer.memory( - name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) - - context = paddle.networks.simple_attention( - encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) - - with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: - decoder_inputs += paddle.layer.full_matrix_projection(input=context) - decoder_inputs += paddle.layer.full_matrix_projection( - input=current_word) - - gru_step = paddle.layer.gru_step( - name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) - - with paddle.layer.mixed( - size=target_dict_dim, - bias_attr=True, - act=paddle.activation.Softmax()) as out: - out += paddle.layer.full_matrix_projection(input=gru_step) - return out - - decoder_group_name = "decoder_group" - group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) - group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) - group_inputs = [group_input1, group_input2] - - trg_embedding = paddle.layer.embedding( - input=paddle.layer.data( - name='target_language_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)), - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) - group_inputs.append(trg_embedding) - - # For decoder equipped with attention mechanism, in training, - # target embeding (the groudtruth) is the data input, - # while encoded source sequence is accessed to as an unbounded memory. - # Here, the StaticInput defines a read-only memory - # for the recurrent_group. - decoder = paddle.layer.recurrent_group( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) - - lbl = paddle.layer.data( - name='target_language_next_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)) - cost = paddle.layer.classification_cost(input=decoder, label=lbl) - - return cost diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst index 904d45966dfc16a474016ff48fd5a951988b0ab0..0f807873ff9a16263920fa73bf70316df3336d0b 100644 --- a/doc/api/v2/run_logic.rst +++ b/doc/api/v2/run_logic.rst @@ -2,6 +2,7 @@ Trainer API ########### + ========== Parameters ========== @@ -24,3 +25,10 @@ Event .. automodule:: paddle.v2.event :members: + + +========= +Inference +========= + +.. autofunction:: paddle.v2.infer \ No newline at end of file diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 6b132d2a4d31ab85347bd41d0243ffee858ac909..78f518cfe49d81470af018685620baf83f082fbb 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -1,158 +1,134 @@ -安装PaddlePaddle的Docker镜像 -============================ +PaddlePaddle的Docker容器使用方式 +================================ -PaddlePaddle项目提供官方 `Docker `_ 镜像。Docker镜像是我们目前唯一官方支持的部署和运行方式。 +PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 -下述内容将分为如下几个类别描述。 -* PaddlePaddle提供的Docker镜像版本 -* 下载和运行Docker镜像 -* 注意事项 +通过Docker容器开发PaddlePaddle +------------------------------ -PaddlePaddle提供的Docker镜像版本 --------------------------------- +开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 -我们提供了12个 `Docker image `_ ,他们的image name都是 :code:`paddledev/paddle` ,tag分别为 +1. 将开发环境构建为Docker镜像 + + .. code-block:: bash -+-----------------+------------------+------------------------+-----------------------+ -| | normal | devel | demo | -+=================+==================+========================+=======================+ -| CPU | cpu-latest | cpu-devel-latest | cpu-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| GPU | gpu-latest | gpu-devel-latest | gpu-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| CPU WITHOUT AVX | cpu-noavx-latest | cpu-noavx-devel-latest | cpu-noavx-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| GPU WITHOUT AVX | gpu-noavx-latest | gpu-noavx-devel-latest | gpu-noavx-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ + git clone --recursive https://github.com/PaddlePaddle/Paddle + cd Paddle + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile . -其中,横向包括三个版本,normal,devel和demo。 -* Normal: 正常的Docker image,只包括paddle的二进制 -* Devel: 包括Paddle的二进制、编译环境和源代码 -* Demo: 包括Paddle运行demo所需要的依赖 + 请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数: -纵向包括四个版本,他们是。 + .. code-block:: bash -* CPU: CPU版本。需要支持AVX指令集的CPU -* GPU: GPU版本。需要支持AVX指令集的CPU -* CPU WITHOUT AVX: CPU版本,不支持AVX指令集的CPU也可以运行 -* GPU WITHOUT AVX: GPU版本,不需要AVX指令集的CPU也可以运行。 + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . -用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU是否支持 :code:`AVX` 指令集\: -.. code-block:: bash +2. 运行开发环境 - if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi + 当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面: + + .. code-block:: bash -如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddledev/paddle:cpu-devel-latest` 来引用这个image。 + docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev -PaddlePaddle提供的镜像并不包含任何命令运行,想要运行PaddlePaddle,您需要进入镜像运行PaddlePaddle -程序或者自定义一个含有启动脚本的image。具体请参考注意事项中的 :code:`使用ssh访问PaddlePaddle镜像` + 以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。 -下载和运行Docker镜像 --------------------- + 请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了: + + .. code-block:: bash -为了运行PaddlePaddle的docker镜像,您需要在机器中安装好Docker。安装Docker需要您的机器 -至少具有3.10以上的linux kernel。安装方法请参考 -`Docker的官方文档 `_ 。如果您使用 -mac osx或者是windows机器,请参考 -`mac osx的安装文档 `_ 和 -`windows 的安装文档 `_ 。 + ssh root@localhost -p 2202 -您可以使用 :code:`docker pull` 命令预先下载镜像,也可以直接执行 -:code:`docker run` 命令运行镜像。执行方法如下: +3. 在Docker开发环境中编译与安装PaddlPaddle代码 -.. code-block:: bash - - $ docker run -it paddledev/paddle:cpu-latest + 当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle: + + .. code-block:: bash + + /paddle/paddle/scripts/docker/build.sh -即可启动和进入PaddlePaddle的container。如果运行GPU版本的PaddlePaddle,则需要先将 -cuda相关的Driver和设备映射进container中,脚本类似于 + 以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试: + + .. code-block:: bash -.. code-block:: bash + cd /paddle/build + ctest - $ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest -进入Docker container后,运行 :code:`paddle version` 即可打印出PaddlePaddle的版本和构建 -信息。安装完成的PaddlePaddle主体包括三个部分, :code:`paddle` 脚本, python的 -:code:`paddle` 包和 :code:`py_paddle` 包。其中\: +纯CPU和GPU的docker镜像 +---------------------- -* :code:`paddle` 脚本和 :code:`paddle` 的python包是PaddlePaddle的训练主要程序。使用 - :code:`paddle` 脚本可以启动PaddlePaddle的训练进程和pserver。而 :code:`paddle` 脚本 - 中的二进制使用了 :code:`paddle` 的python包来做配置文件解析等工作。 -* python包 :code:`py_paddle` 是一个swig封装的PaddlePaddle包,用来做预测和简单的定制化 - 训练。 +对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。我们通过设置 `dockerhub.com `_ 自动运行以下两个命令: -注意事项 --------- +.. code-block:: bash -性能问题 -++++++++ + docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile . + docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu . -由于Docker是基于容器的轻量化虚拟方案,所以在CPU的运算性能上并不会有严重的影响。 -而GPU的驱动和设备全部映射到了容器内,所以GPU在运算性能上也不会有严重的影响。 +以交互容器方式运行纯CPU的镜像: -但是如果使用了高性能的网卡,例如RDMA网卡(RoCE 40GbE 或者 IB 56GbE),或者高性能的 -以太网卡 (10GbE)。推荐使用将本地网卡,即 "--net=host" 来进行训练。而不使用docker -的网桥来进行网络通信。 +.. code-block:: bash -远程访问问题和二次开发 -++++++++++++++++++++++ + docker run -it --rm paddledev/paddle:cpu-latest /bin/bash -由于PaddlePaddle的Docker镜像并不包含任何预定义的运行命令。所以如果想要在后台启用ssh -远程访问,则需要进行一定的二次开发,将ssh装入系统内并开启远程访问。二次开发可以 -使用Dockerfile构建一个全新的docker image。需要参考 -`Dockerfile的文档 `_ 和 -`Dockerfile的最佳实践 `_ -两个文档。 +或者,可以以后台进程方式运行容器: -简单的含有ssh的Dockerfile如下: +.. code-block:: bash -.. code-block:: bash + docker run -d -p 2202:22 paddledev/paddle:cpu-latest - FROM paddledev/paddle:cpu-latest +然后用密码 :code:`root` SSH进入容器: - MAINTAINER PaddlePaddle dev team +.. code-block:: bash - RUN apt-get update - RUN apt-get install -y openssh-server - RUN mkdir /var/run/sshd - RUN echo 'root:root' | chpasswd + ssh -p 2202 root@localhost - RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config - RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。 - EXPOSE 22 - CMD ["/usr/sbin/sshd", "-D"] +以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker: +.. code-block:: bash -使用该Dockerfile构建出镜像,然后运行这个container即可。相关命令为\: + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest -.. code-block:: bash - # cd到含有Dockerfile的路径中 - $ docker build . -t paddle_ssh - # 运行这个container,将宿主机的8022端口映射到container的22端口上 - $ docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh +非AVX镜像 +--------- -执行如下命令即可以关闭这个container,并且删除container中的数据\: +纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: -.. code-block:: bash - - # 关闭container - $ docker stop paddle_ssh_machine - # 删除container - $ docker rm paddle_ssh_machine -如果想要在外部机器访问这个container,即可以使用ssh访问宿主机的8022端口。用户名为 -root,密码也是root。命令为\: +.. code-block:: bash -.. code-block:: bash + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - $ ssh -p 8022 root@YOUR_HOST_MACHINE +如果输出是No,我们就需要手动编译一个非AVX版本的镜像: -至此,您就可以远程的使用PaddlePaddle啦。 +.. code-block:: bash + + cd ~ + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . + docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . + + +文档 +---- + +Paddle的Docker镜像带有一个通过 `woboq code browser +`_ 生成的HTML版本的C++源代码,便于用户浏览C++源码。 + +只要在Docker里启动PaddlePaddle的时候给它一个名字,就可以再运行另一个Nginx Docker镜像来服务HTML代码: + +.. code-block:: bash + + docker run -d --name paddle-cpu-doc paddle:cpu + docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx + +接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 123273f916f5d33e2543d9f5f28573c3b5761e28..4654d0206413ec198da62af12e294cd5b442e735 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -346,7 +346,9 @@ Evaluator* MultiGradientMachine::makeEvaluator() const { void MultiGradientMachine::eval(Evaluator* evaluator) const { for (auto& thread : threads_) { SetDevice device(thread->getDeviceId()); - thread->getGradientMachine()->eval(evaluator); + if (thread->hasInputData()) { + thread->getGradientMachine()->eval(evaluator); + } } } @@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector* outArgs, REGISTER_TIMER("waitOutArgs"); thread->waitOutArgsReady(); } - outArgs_.resize(threads_[0]->getOutArgs().size()); + + outArgs_.resize(threads_[threads_.size() - 1]->getOutArgs().size()); REGISTER_TIMER("copyOutArgs"); for (size_t i = 0; i < outArgs_.size(); ++i) { std::vector args; args.reserve(threads_.size()); for (auto& thread : threads_) { - args.push_back(thread->getOutArgs()[i]); + // If the thread input is empty, then the output is empty. + auto tmp = thread->getOutArgs(); + if (tmp.size() > 0) { + args.push_back(tmp[i]); + } } outArgs_[i].concat(args, useGpu_, outArgStream_, passType); } @@ -534,7 +541,7 @@ void TrainerThread::prefetch() { void TrainerThread::forward() { if (!inArgsCopied_) { REGISTER_TIMER("copyInArgs"); - copyInArgs(); + batchSize_ = copyInArgs(); } else { inArgsCopied_ = false; } @@ -564,7 +571,12 @@ void TrainerThread::forward() { { REGISTER_TIMER("thread_forward"); - gradientMachine_->forward(inArgs_, &outArgs_, multiMachine_->getPassType()); + if (batchSize_ > 0) { + gradientMachine_->forward( + inArgs_, &outArgs_, multiMachine_->getPassType()); + } else { + outArgs_.clear(); + } } outArgsReadySem_.post(); } @@ -574,7 +586,13 @@ void TrainerThread::backward() { if (multiMachine_->isPassGrad()) { copyOutputGrad(); } - gradientMachine_->backward(backwardCallback_); + if (batchSize_ > 0) { + gradientMachine_->backward(backwardCallback_); + } else { + for (size_t i = parameters_.size(); i > 0; i--) { + backwardCallback(parameters_[i - 1].get()); + } + } if (multiMachine_->hasNonstaticCpuParamters()) { mergeCpuGradients(); } @@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) { notifyValueDispatch(paramId); } -void TrainerThread::copyInArgs() { +int TrainerThread::copyInArgs() { const std::vector& fullInArgs = multiMachine_->getInArgs(); int numThreads = multiMachine_->getAllThreads().size(); int32_t numSequences = fullInArgs[0].getNumSequences(); @@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() { } if (copySize == 0) { - return; + return 0; } for (size_t i = 0; i < fullInArgs.size(); i++) { @@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() { copySize, FLAGS_parallel_nn ? false : multiMachine_->useGpu()); } + return copySize; } void TrainerThread::mergeCpuGradients() { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 838a52b5153af63adbce5788824b9f541f22517c..70203bbb97fe79d72fbc6bd2b5d427cb1de7b61f 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -387,6 +387,9 @@ public: /// copy the output gradient from the main GradientMachine. void copyOutputGrad(); + /// Whether the thread has input data. + bool hasInputData() { return batchSize_ != 0; } + protected: void mergeCpuGradients(); @@ -407,7 +410,7 @@ protected: void copyGradToBufferThread(); void gradCollectThread(); - void copyInArgs(); + int copyInArgs(); void forward(); void backward(); void backwardCallback(Parameter* para); @@ -467,6 +470,7 @@ protected: /// indicate whether inArgs is copied before forward() bool inArgsCopied_; + int batchSize_; }; } // namespace paddle diff --git a/python/paddle/trainer_config_helpers/layer_math.py b/python/paddle/trainer_config_helpers/layer_math.py index 2d9e36f2b0d379d907634208a45c69efa9dbba3d..544b443825393c9a31c0375724d4ca63dac5c5eb 100644 --- a/python/paddle/trainer_config_helpers/layer_math.py +++ b/python/paddle/trainer_config_helpers/layer_math.py @@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation()) register_unary_math_op('sigmoid', act.SigmoidActivation()) register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('square', act.SquareActivation()) +register_unary_math_op('relu', act.ReluActivation()) def add(layeroutput, other): diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py index 3331c10d6497f58eb135208bd7abe48aacfb10ae..24c901c8ee3ab1c90fc14fbff761db06345a6313 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py +++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py @@ -7,8 +7,9 @@ x = layer_math.exp(x) x = layer_math.log(x) x = layer_math.abs(x) x = layer_math.sigmoid(x) +x = layer_math.tanh(x) x = layer_math.square(x) -x = layer_math.square(x) +x = layer_math.relu(x) y = 1 + x y = y + 1 y = x + y diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr index da8da1b541f37a09654202f68232b99e4dac9f61..9b8a2ad9687d313e6c5017c2d7331eddf539af92 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr @@ -65,13 +65,28 @@ layers { } } } +layers { + name: "__tanh_0__" + type: "mixed" + size: 100 + active_type: "tanh" + inputs { + input_layer_name: "__sigmoid_0__" + proj_conf { + type: "identity" + name: "___tanh_0__.w0" + input_size: 100 + output_size: 100 + } + } +} layers { name: "__square_0__" type: "mixed" size: 100 active_type: "square" inputs { - input_layer_name: "__sigmoid_0__" + input_layer_name: "__tanh_0__" proj_conf { type: "identity" name: "___square_0__.w0" @@ -81,15 +96,15 @@ layers { } } layers { - name: "__square_1__" + name: "__relu_0__" type: "mixed" size: 100 - active_type: "square" + active_type: "relu" inputs { input_layer_name: "__square_0__" proj_conf { type: "identity" - name: "___square_1__.w0" + name: "___relu_0__.w0" input_size: 100 output_size: 100 } @@ -101,7 +116,7 @@ layers { size: 100 active_type: "" inputs { - input_layer_name: "__square_1__" + input_layer_name: "__relu_0__" } slope: 1.0 intercept: 1 @@ -123,7 +138,7 @@ layers { size: 100 active_type: "" inputs { - input_layer_name: "__square_1__" + input_layer_name: "__relu_0__" proj_conf { type: "identity" name: "___mixed_0__.w0" @@ -147,7 +162,7 @@ layers { size: 100 active_type: "" inputs { - input_layer_name: "__square_1__" + input_layer_name: "__relu_0__" } slope: -1.0 intercept: 0.0 @@ -339,8 +354,9 @@ sub_models { layer_names: "__log_0__" layer_names: "__abs_0__" layer_names: "__sigmoid_0__" + layer_names: "__tanh_0__" layer_names: "__square_0__" - layer_names: "__square_1__" + layer_names: "__relu_0__" layer_names: "__slope_intercept_layer_0__" layer_names: "__slope_intercept_layer_1__" layer_names: "__mixed_0__" diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index b7465238be8138e47913b7bd3f1c669ed9653958..bda8e22fd282f8ff4a820e4ecb6b3bb421d57890 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -14,11 +14,18 @@ from py_paddle import DataProviderConverter -import data_type +import paddle.trainer.PyDataProvider2 as pydp2 __all__ = ['DataFeeder'] +def default_feeding_map(data_types): + reader_dict = dict() + for i, tp in enumerate(data_types): + reader_dict[tp[0]] = i + return reader_dict + + class DataFeeder(DataProviderConverter): """ DataFeeder converts the data returned by paddle.reader into a data structure @@ -60,19 +67,27 @@ class DataFeeder(DataProviderConverter): :type data_types: list :param reader_dict: A dictionary to specify the position of each data in the input data. - :type reader_dict: dict + :type feeding: dict """ - def __init__(self, data_types, reader_dict): + def __init__(self, data_types, feeding=None): self.input_names = [] input_types = [] - self.reader_dict = reader_dict + if feeding is None: + feeding = default_feeding_map(data_types) + + self.feeding = feeding for each in data_types: self.input_names.append(each[0]) - assert isinstance(each[1], data_type.InputType) + if not isinstance(each[1], pydp2.InputType): + raise TypeError("second item in each data_type should be an " + "InputType") input_types.append(each[1]) DataProviderConverter.__init__(self, input_types) + def __len__(self): + return len(self.input_names) + def convert(self, dat, argument=None): """ :param dat: A list of mini-batch data. Each sample is a list or tuple @@ -90,7 +105,7 @@ class DataFeeder(DataProviderConverter): for each in data: reorder = [] for name in self.input_names: - reorder.append(each[self.reader_dict[name]]) + reorder.append(each[self.feeding[name]]) retv.append(reorder) return retv diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index dc65e8f8b6f04b078a3449c622478095086cecbe..25fd8227da2f219d75c6b830e65627ecf35be453 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -23,7 +23,12 @@ import re import random import functools -__all__ = ['train_creator', 'test_creator'] +__all__ = [ + 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', + 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' +] + +age_table = [1, 18, 25, 35, 45, 50, 56] class MovieInfo(object): @@ -38,17 +43,32 @@ class MovieInfo(object): [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()] ] + def __str__(self): + return "" % ( + self.index, self.title, self.categories) + + def __repr__(self): + return self.__str__() + class UserInfo(object): def __init__(self, index, gender, age, job_id): self.index = int(index) self.is_male = gender == 'M' - self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age)) + self.age = age_table.index(int(age)) self.job_id = int(job_id) def value(self): return [self.index, 0 if self.is_male else 1, self.age, self.job_id] + def __str__(self): + return "" % ( + self.index, "M" + if self.is_male else "F", age_table[self.age], self.job_id) + + def __repr__(self): + return str(self) + MOVIE_INFO = None MOVIE_TITLE_DICT = None @@ -59,7 +79,8 @@ USER_INFO = None def __initialize_meta_info__(): fn = download( url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', - md5='c4d9eecfca2ab87c1945afe126590906') + module_name='movielens', + md5sum='c4d9eecfca2ab87c1945afe126590906') global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -122,14 +143,63 @@ def __reader_creator__(**kwargs): return lambda: __reader__(**kwargs) -train_creator = functools.partial(__reader_creator__, is_test=False) -test_creator = functools.partial(__reader_creator__, is_test=True) +train = functools.partial(__reader_creator__, is_test=False) +test = functools.partial(__reader_creator__, is_test=True) + + +def get_movie_title_dict(): + __initialize_meta_info__() + return MOVIE_TITLE_DICT + + +def __max_index_info__(a, b): + if a.index > b.index: + return a + else: + return b + + +def max_movie_id(): + __initialize_meta_info__() + return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index + + +def max_user_id(): + __initialize_meta_info__() + return reduce(__max_index_info__, USER_INFO.viewvalues()).index + + +def __max_job_id_impl__(a, b): + if a.job_id > b.job_id: + return a + else: + return b + + +def max_job_id(): + __initialize_meta_info__() + return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id + + +def movie_categories(): + __initialize_meta_info__() + return CATEGORIES_DICT + + +def user_info(): + __initialize_meta_info__() + return USER_INFO + + +def movie_info(): + __initialize_meta_info__() + return MOVIE_INFO def unittest(): - for train_count, _ in enumerate(train_creator()()): + for train_count, _ in enumerate(train()()): pass - for test_count, _ in enumerate(test_creator()()): + for test_count, _ in enumerate(test()()): pass print train_count, test_count diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 9904848b5d3ef95dc331fc0ba1a98f29f8b1dfeb..f5a16d51477f9cfbf0cd32af54098406fbbd2b41 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -14,129 +14,92 @@ """ wmt14 dataset """ -import paddle.v2.dataset.common import tarfile -import os.path -import itertools + +import paddle.v2.dataset.common __all__ = ['train', 'test', 'build_dict'] URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' -URL_TRAIN = 'http://localhost:8000/train.tgz' -MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7' - - -def word_count(f, word_freq=None): - add = paddle.v2.dataset.common.dict_add - if word_freq == None: - word_freq = {} - - for l in f: - for w in l.strip().split(): - add(word_freq, w) - add(word_freq, '') - add(word_freq, '') - - return word_freq - - -def get_word_dix(word_freq): - TYPO_FREQ = 50 - word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) - word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) - words, _ = list(zip(*word_freq_sorted)) - word_idx = dict(zip(words, xrange(len(words)))) - word_idx[''] = len(words) - return word_idx - - -def get_word_freq(train, dev): - word_freq = word_count(train, word_count(dev)) - if '' in word_freq: - # remove for now, since we will set it as last index - del word_freq[''] - return word_freq - - -def build_dict(): - base_dir = './wmt14-data' - train_en_filename = base_dir + '/train/train.en' - train_fr_filename = base_dir + '/train/train.fr' - dev_en_filename = base_dir + '/dev/ntst1213.en' - dev_fr_filename = base_dir + '/dev/ntst1213.fr' - - if not os.path.exists(train_en_filename) or not os.path.exists( - train_fr_filename): - with tarfile.open( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', - MD5_TRAIN)) as tf: - tf.extractall(base_dir) - - if not os.path.exists(dev_en_filename) or not os.path.exists( - dev_fr_filename): - with tarfile.open( - paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14', - MD5_DEV_TEST)) as tf: - tf.extractall(base_dir) - - f_en = open(train_en_filename) - f_fr = open(train_fr_filename) - f_en_dev = open(dev_en_filename) - f_fr_dev = open(dev_fr_filename) - - word_freq_en = get_word_freq(f_en, f_en_dev) - word_freq_fr = get_word_freq(f_fr, f_fr_dev) - - f_en.close() - f_fr.close() - f_en_dev.close() - f_fr_dev.close() - - return get_word_dix(word_freq_en), get_word_dix(word_freq_fr) - - -def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr): +# this is a small set of data for test. The original data is too large and will be add later. +URL_TRAIN = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz' +MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6' + +START = "" +END = "" +UNK = "" +UNK_IDX = 2 + + +def __read_to_dict__(tar_file, dict_size): + def __to_dict__(fd, size): + out_dict = dict() + for line_count, line in enumerate(fd): + if line_count < size: + out_dict[line.strip()] = line_count + else: + break + return out_dict + + with tarfile.open(tar_file, mode='r') as f: + names = [ + each_item.name for each_item in f + if each_item.name.endswith("src.dict") + ] + assert len(names) == 1 + src_dict = __to_dict__(f.extractfile(names[0]), dict_size) + names = [ + each_item.name for each_item in f + if each_item.name.endswith("trg.dict") + ] + assert len(names) == 1 + trg_dict = __to_dict__(f.extractfile(names[0]), dict_size) + return src_dict, trg_dict + + +def reader_creator(tar_file, file_name, dict_size): def reader(): - if not os.path.exists(path_en) or not os.path.exists(path_fr): - with tarfile.open( - paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf: - tf.extractall(directory) - - f_en = open(path_en) - f_fr = open(path_fr) - UNK_en = dict_en[''] - UNK_fr = dict_fr[''] - - for en, fr in itertools.izip(f_en, f_fr): - src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()] - tar_ids = [ - dict_fr.get(w, UNK_fr) - for w in [''] + fr.strip().split() + [''] + src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) + with tarfile.open(tar_file, mode='r') as f: + names = [ + each_item.name for each_item in f + if each_item.name.endswith(file_name) ] - - # remove sequence whose length > 80 in training mode - if len(src_ids) == 0 or len(tar_ids) <= 1 or len( - src_ids) > 80 or len(tar_ids) > 80: - continue - - yield src_ids, tar_ids[:-1], tar_ids[1:] - - f_en.close() - f_fr.close() + for name in names: + for line in f.extractfile(name): + line_split = line.strip().split('\t') + if len(line_split) != 2: + continue + src_seq = line_split[0] # one source sequence + src_words = src_seq.split() + src_ids = [ + src_dict.get(w, UNK_IDX) + for w in [START] + src_words + [END] + ] + + trg_seq = line_split[1] # one target sequence + trg_words = trg_seq.split() + trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words] + + # remove sequence whose length > 80 in training mode + if len(src_ids) > 80 or len(trg_ids) > 80: + continue + trg_ids_next = trg_ids + [trg_dict[END]] + trg_ids = [trg_dict[START]] + trg_ids + + yield src_ids, trg_ids, trg_ids_next return reader -def train(dict_en, dict_fr): - directory = './wmt14-data' - return reader_creator(directory, directory + '/train/train.en', - directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN, - dict_en, dict_fr) +def train(dict_size): + return reader_creator( + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'train/train', dict_size) -def test(dict_en, dict_fr): - directory = './wmt14-data' - return reader_creator(directory, directory + '/dev/ntst1213.en', - directory + '/dev/ntst1213.fr', URL_DEV_TEST, - MD5_DEV_TEST, dict_en, dict_fr) +def test(dict_size): + return reader_creator( + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'test/test', dict_size) diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 476fd3fa4523a77709f68c73c73e6851e04064aa..ec3c67d89548f68d705a9b5de80e28597e9829da 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -1,16 +1,16 @@ +import numpy import py_paddle.swig_paddle as api - +import collections import topology +import minibatch from data_feeder import DataFeeder -import itertools -import numpy __all__ = ['infer'] class Inference(object): - def __init__(self, output, parameters): - topo = topology.Topology(output) + def __init__(self, output_layer, parameters): + topo = topology.Topology(output_layer) gm = api.GradientMachine.createFromConfigProto( topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) for param in gm.getParameters(): @@ -21,10 +21,16 @@ class Inference(object): self.__gradient_machine__ = gm self.__data_types__ = topo.data_type() - def iter_infer(self, reader, reader_dict=None): - if reader_dict is None: - reader_dict = self.default_reader_dict() - feeder = DataFeeder(self.__data_types__, reader_dict) + def iter_infer(self, input, feeding=None): + feeder = DataFeeder(self.__data_types__, feeding) + batch_size = len(input) + + def __reader_impl__(): + for each_sample in input: + yield each_sample + + reader = minibatch.batch(__reader_impl__, batch_size=batch_size) + self.__gradient_machine__.start() for data_batch in reader(): yield self.__gradient_machine__.forwardTest(feeder(data_batch)) @@ -47,13 +53,36 @@ class Inference(object): else: return retv - def default_reader_dict(self): - reader_dict = dict() - for i, tp in enumerate(self.__data_types__): - reader_dict[tp[0]] = i - return reader_dict +def infer(output_layer, parameters, input, feeding=None, field='value'): + """ + Infer a neural network by given neural network output and parameters. The + user should pass either a batch of input data or reader method. + + Example usages: + + .. code-block:: python + + result = paddle.infer(prediction, parameters, input=SomeData, + batch_size=32) + print result + + :param output_layer: output of the neural network that would be inferred + :type output_layer: paddle.v2.config_base.Layer + :param parameters: parameters of the neural network. + :type parameters: paddle.v2.parameters.Parameters + :param input: input data batch. Should be a python iterable object, and each + element is the data batch. + :type input: collections.Iterable + :param feeding: Reader dictionary. Default could generate from input + value. + :param field: The prediction field. It should in [`value`, `ids`]. `value` + means return the prediction probabilities, `ids` means return + the prediction labels. Default is `value` + :type field: str + :return: a numpy array + :rtype: numpy.ndarray + """ -def infer(output, parameters, reader, reader_dict=None, field='value'): - inferer = Inference(output=output, parameters=parameters) - return inferer.infer(field=field, reader=reader, reader_dict=reader_dict) + inferer = Inference(output_layer=output_layer, parameters=parameters) + return inferer.infer(field=field, input=input, feeding=feeding) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 187abaf9a34f7b7901c5ff71b15a6db05756c7c4..7bd3e2c565ee00c91402e7dea36c7393fb1a9bdf 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -61,7 +61,7 @@ class SGD(object): self.__gradient_machine__.randParameters() parameters.append_gradient_machine(gm) - def train(self, reader, num_passes=1, event_handler=None, reader_dict=None): + def train(self, reader, num_passes=1, event_handler=None, feeding=None): """ Training method. Will train num_passes of input data. @@ -70,14 +70,13 @@ class SGD(object): :param event_handler: Event handler. A method will be invoked when event occurred. :type event_handler: (BaseEvent) => None + :param feeding: Feeding is a map of neural network input name and array + index that reader returns. + :type feeding: dict :return: """ if event_handler is None: event_handler = default_event_handler - - if reader_dict is None: - reader_dict = self.default_reader_dict() - __check_train_args__(**locals()) updater = self.__optimizer__.create_local_updater() @@ -89,9 +88,7 @@ class SGD(object): pass_evaluator = self.__gradient_machine__.makeEvaluator() assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) - - feeder = DataFeeder(self.__data_types__, reader_dict) - + feeder = DataFeeder(self.__data_types__, feeding) for pass_id in xrange(num_passes): event_handler(v2_event.BeginPass(pass_id)) pass_evaluator.start() @@ -125,17 +122,8 @@ class SGD(object): event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) self.__gradient_machine__.finish() - def default_reader_dict(self): - reader_dict = dict() - for i, tp in enumerate(self.__data_types__): - reader_dict[tp[0]] = i - return reader_dict - - def test(self, reader, reader_dict=None): - if reader_dict is None: - reader_dict = self.default_reader_dict() - - feeder = DataFeeder(self.__data_types__, reader_dict) + def test(self, reader, feeding=None): + feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0) evaluator.start()