From bb036f52605fefa1364b7bb38d16237dc2d41ff7 Mon Sep 17 00:00:00 2001 From: ying Date: Fri, 19 Jan 2018 15:07:18 +0800 Subject: [PATCH] use the same version yapf with paddle main repo. --- .pre-commit-config.yaml | 4 +- conv_seq2seq/beamsearch.py | 28 +++--- conv_seq2seq/model.py | 39 ++++---- conv_seq2seq/train.py | 8 +- ctr/avazu_data_processer.py | 14 +-- ctr/network_conf.py | 23 ++--- ctr/train.py | 5 +- ctr/utils.py | 4 +- deep_fm/network_conf.py | 19 ++-- deep_fm/preprocess.py | 13 ++- deep_fm/train.py | 5 +- dssm/network_conf.py | 29 +++--- dssm/reader.py | 10 ++- dssm/train.py | 6 +- fluid/adversarial/advbox/models/paddle.py | 14 ++- fluid/adversarial/fluid_mnist.py | 10 +-- fluid/adversarial/mnist_tutorial_fgsm.py | 3 +- generate_chinese_poetry/network_conf.py | 18 ++-- generate_chinese_poetry/reader.py | 15 ++-- generate_chinese_poetry/train.py | 5 +- generate_sequence_by_rnn_lm/network_conf.py | 5 +- generate_sequence_by_rnn_lm/train.py | 9 +- globally_normalized_reader/basic_modules.py | 5 +- globally_normalized_reader/beam_decoding.py | 34 +++---- globally_normalized_reader/featurize.py | 3 +- globally_normalized_reader/model.py | 49 +++++----- globally_normalized_reader/train.py | 8 +- globally_normalized_reader/vocab.py | 8 +- hsigmoid/network_conf.py | 17 ++-- hsigmoid/train.py | 3 +- image_classification/alexnet.py | 23 +++-- .../caffe2paddle/caffe2paddle.py | 4 +- image_classification/googlenet.py | 37 ++++---- image_classification/inception_resnet_v2.py | 5 +- image_classification/inception_v4.py | 9 +- image_classification/resnet.py | 10 ++- image_classification/train.py | 9 +- image_classification/vgg.py | 23 +++-- image_classification/xception.py | 9 +- ltr/lambda_rank.py | 29 +++--- ltr/ranknet.py | 24 ++--- ltr/train.py | 6 +- mt_with_external_memory/external_memory.py | 48 +++++----- mt_with_external_memory/infer.py | 8 +- mt_with_external_memory/model.py | 18 ++-- mt_with_external_memory/train.py | 11 ++- nce_cost/network_conf.py | 15 ++-- nce_cost/train.py | 3 +- .../text_classification/network_conf.py | 11 ++- nested_sequence/text_classification/reader.py | 14 ++- neural_qa/network.py | 89 +++++++++---------- neural_qa/train.py | 8 +- neural_qa/utils.py | 10 ++- nmt_without_attention/network_conf.py | 18 ++-- nmt_without_attention/train.py | 5 +- scene_text_recognition/infer.py | 4 +- scene_text_recognition/network_conf.py | 7 +- scene_text_recognition/train.py | 9 +- scheduled_sampling/network_conf.py | 54 +++++------ scheduled_sampling/train.py | 12 ++- sequence_tagging_for_ner/network_conf.py | 28 +++--- sequence_tagging_for_ner/train.py | 9 +- ssd/config/__init__.py | 1 + ssd/eval.py | 9 +- ssd/train.py | 9 +- text_classification/network_conf.py | 5 +- 66 files changed, 505 insertions(+), 501 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ede1c53a..1584bc76 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ -- repo: https://github.com/pre-commit/mirrors-yapf.git - sha: v0.16.0 +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 hooks: - id: yapf files: \.py$ diff --git a/conv_seq2seq/beamsearch.py b/conv_seq2seq/beamsearch.py index 72fb59d4..dd8562f0 100644 --- a/conv_seq2seq/beamsearch.py +++ b/conv_seq2seq/beamsearch.py @@ -42,10 +42,12 @@ class BeamSearch(object): for sample_id in sample_list: for path in self.candidate_path[sample_id]: if len(path['seq']) < self.win_len: - cur_trg = [self.word_padding] * (self.win_len - len( - path['seq']) - 1) + [self.trg_dict['']] + path['seq'] - cur_trg_pos = [self.pos_padding] * (self.win_len - len( - path['seq']) - 1) + [0] + range(1, len(path['seq']) + 1) + cur_trg = [self.word_padding] * ( + self.win_len - len(path['seq']) - 1 + ) + [self.trg_dict['']] + path['seq'] + cur_trg_pos = [self.pos_padding] * ( + self.win_len - len(path['seq']) - 1) + [0] + range( + 1, len(path['seq']) + 1) else: cur_trg = path['seq'][-self.win_len:] cur_trg_pos = range( @@ -84,13 +86,11 @@ class BeamSearch(object): for seq_id, path in enumerate(self.candidate_path[sample_id]): for w in top_words[idx, :]: score = path['score'] + math.log(prob[idx, w]) - candidate_words[sample_id] = candidate_words[sample_id] + [ - { - 'word': w, - 'score': score, - 'seq_id': seq_id - } - ] + candidate_words[sample_id] = candidate_words[sample_id] + [{ + 'word': w, + 'score': score, + 'seq_id': seq_id + }] idx = idx + 1 return candidate_words @@ -140,10 +140,8 @@ class BeamSearch(object): w['word'] ] new_path[sample_id] = new_path[sample_id] + [{ - 'seq': - seq, - 'score': - w['score'] + 'seq': seq, + 'score': w['score'] }] return new_path diff --git a/conv_seq2seq/model.py b/conv_seq2seq/model.py index 21813af4..c31238f8 100644 --- a/conv_seq2seq/model.py +++ b/conv_seq2seq/model.py @@ -193,22 +193,20 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum): m = paddle.layer.dot_prod(input1=expanded, input2=encoded_vec) - attention_weight = paddle.layer.fc( - input=m, - size=1, - act=paddle.activation.SequenceSoftmax(), - bias_attr=False) + attention_weight = paddle.layer.fc(input=m, + size=1, + act=paddle.activation.SequenceSoftmax(), + bias_attr=False) scaled = paddle.layer.scaling(weight=attention_weight, input=encoded_sum) attended = paddle.layer.pooling( input=scaled, pooling_type=paddle.pooling.Sum()) - attended_proj = paddle.layer.fc( - input=attended, - size=state_size, - act=paddle.activation.Linear(), - bias_attr=True) + attended_proj = paddle.layer.fc(input=attended, + size=state_size, + act=paddle.activation.Linear(), + bias_attr=True) attention_result = paddle.layer.addto(input=[attended_proj, residual]) @@ -279,11 +277,10 @@ def decoder(token_emb, if block_input.size == size: residual = block_input else: - residual = paddle.layer.fc( - input=block_input, - size=size, - act=paddle.activation.Linear(), - bias_attr=True) + residual = paddle.layer.fc(input=block_input, + size=size, + act=paddle.activation.Linear(), + bias_attr=True) decoder_state = gated_conv_with_batchnorm( input=block_input, @@ -381,12 +378,14 @@ def conv_seq2seq(src_dict_size, input=src, size=emb_dim, name='src_word_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) + param_attr=paddle.attr.Param( + initial_mean=0., initial_std=0.1)) src_pos_emb = paddle.layer.embedding( input=src_pos, size=emb_dim, name='src_pos_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) + param_attr=paddle.attr.Param( + initial_mean=0., initial_std=0.1)) num_attention = len(dec_conv_blocks) encoded_vec, encoded_sum = encoder( @@ -410,12 +409,14 @@ def conv_seq2seq(src_dict_size, input=trg, size=emb_dim, name='trg_word_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) + param_attr=paddle.attr.Param( + initial_mean=0., initial_std=0.1)) trg_pos_emb = paddle.layer.embedding( input=trg_pos, size=emb_dim, name='trg_pos_emb', - param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) + param_attr=paddle.attr.Param( + initial_mean=0., initial_std=0.1)) decoder_out, weight = decoder( token_emb=trg_emb, diff --git a/conv_seq2seq/train.py b/conv_seq2seq/train.py index e23d9625..4bd9a1af 100644 --- a/conv_seq2seq/train.py +++ b/conv_seq2seq/train.py @@ -166,8 +166,7 @@ def train(train_data_path, src_dict_size = src_dict.__len__() trg_dict_size = trg_dict.__len__() - optimizer = paddle.optimizer.Adam( - learning_rate=1e-3, ) + optimizer = paddle.optimizer.Adam(learning_rate=1e-3, ) cost = conv_seq2seq( src_dict_size=src_dict_size, @@ -182,8 +181,9 @@ def train(train_data_path, # create parameters and trainer parameters = paddle.parameters.create(cost) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks] padding_num = reduce(lambda x, y: x + y, padding_list) diff --git a/ctr/avazu_data_processer.py b/ctr/avazu_data_processer.py index 18aa8533..dd3c1441 100644 --- a/ctr/avazu_data_processer.py +++ b/ctr/avazu_data_processer.py @@ -79,8 +79,9 @@ all the files are for demo. feature_dims = {} -categorial_features = ('C1 banner_pos site_category app_category ' + - 'device_type device_conn_type').split() +categorial_features = ( + 'C1 banner_pos site_category app_category ' + 'device_type device_conn_type' +).split() id_features = 'id site_id app_id device_id _device_id_cross_site_id'.split() @@ -335,8 +336,8 @@ class AvazuDataset(object): else: fea0 = self.fields[key].cross_fea0 fea1 = self.fields[key].cross_fea1 - record.append( - self.fields[key].gen_cross_fea(row[fea0], row[fea1])) + record.append(self.fields[key].gen_cross_fea(row[fea0], row[ + fea1])) sparse_input = concat_sparse_vectors(record, self.id_dims) @@ -396,8 +397,9 @@ with open(output_infer_path, 'w') as f: dnn_input, lr_input = record dnn_input = ids2dense(dnn_input, feature_dims['dnn_input']) lr_input = ids2sparse(lr_input) - line = "%s\t%s\n" % (' '.join(map(str, dnn_input)), - ' '.join(map(str, lr_input)), ) + line = "%s\t%s\n" % ( + ' '.join(map(str, dnn_input)), + ' '.join(map(str, lr_input)), ) f.write(line) if id > args.test_set_size: break diff --git a/ctr/network_conf.py b/ctr/network_conf.py index f6f4e4a5..bcff49ee 100644 --- a/ctr/network_conf.py +++ b/ctr/network_conf.py @@ -60,15 +60,14 @@ class CTRmodel(object): ''' build DNN submodel. ''' - dnn_embedding = layer.fc( - input=self.dnn_merged_input, size=dnn_layer_dims[0]) + dnn_embedding = layer.fc(input=self.dnn_merged_input, + size=dnn_layer_dims[0]) _input_layer = dnn_embedding for i, dim in enumerate(dnn_layer_dims[1:]): - fc = layer.fc( - input=_input_layer, - size=dim, - act=paddle.activation.Relu(), - name='dnn-fc-%d' % i) + fc = layer.fc(input=_input_layer, + size=dim, + act=paddle.activation.Relu(), + name='dnn-fc-%d' % i) _input_layer = fc return _input_layer @@ -76,8 +75,9 @@ class CTRmodel(object): ''' config LR submodel ''' - fc = layer.fc( - input=self.lr_merged_input, size=1, act=paddle.activation.Relu()) + fc = layer.fc(input=self.lr_merged_input, + size=1, + act=paddle.activation.Relu()) return fc def _build_classification_model(self, dnn, lr): @@ -95,8 +95,9 @@ class CTRmodel(object): def _build_regression_model(self, dnn, lr): merge_layer = layer.concat(input=[dnn, lr]) - self.output = layer.fc( - input=merge_layer, size=1, act=paddle.activation.Sigmoid()) + self.output = layer.fc(input=merge_layer, + size=1, + act=paddle.activation.Sigmoid()) if not self.is_infer: self.train_cost = paddle.layer.square_error_cost( input=self.output, label=self.click) diff --git a/ctr/train.py b/ctr/train.py index 421425e2..de7add61 100644 --- a/ctr/train.py +++ b/ctr/train.py @@ -68,8 +68,9 @@ def train(): params = paddle.parameters.create(model.train_cost) optimizer = paddle.optimizer.AdaGrad() - trainer = paddle.trainer.SGD( - cost=model.train_cost, parameters=params, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=model.train_cost, + parameters=params, + update_equation=optimizer) dataset = reader.Dataset() diff --git a/ctr/utils.py b/ctr/utils.py index d8cf569c..437554c3 100644 --- a/ctr/utils.py +++ b/ctr/utils.py @@ -64,5 +64,7 @@ def load_dnn_input_record(sent): def load_lr_input_record(sent): res = [] for _ in [x.split(':') for x in sent.split()]: - res.append((int(_[0]), float(_[1]), )) + res.append(( + int(_[0]), + float(_[1]), )) return res diff --git a/deep_fm/network_conf.py b/deep_fm/network_conf.py index 1857c8f6..545fe07b 100644 --- a/deep_fm/network_conf.py +++ b/deep_fm/network_conf.py @@ -5,8 +5,9 @@ sparse_feature_dim = 117568 def fm_layer(input, factor_size, fm_param_attr): - first_order = paddle.layer.fc( - input=input, size=1, act=paddle.activation.Linear()) + first_order = paddle.layer.fc(input=input, + size=1, + act=paddle.activation.Linear()) second_order = paddle.layer.factorization_machine( input=input, factor_size=factor_size, @@ -51,17 +52,15 @@ def DeepFM(factor_size, infer=False): sparse_embed_seq = map(embedding_layer, sparse_input_ids) sparse_embed = paddle.layer.concat(sparse_embed_seq) - fc1 = paddle.layer.fc( - input=[sparse_embed, dense_input], - size=400, - act=paddle.activation.Relu()) + fc1 = paddle.layer.fc(input=[sparse_embed, dense_input], + size=400, + act=paddle.activation.Relu()) fc2 = paddle.layer.fc(input=fc1, size=400, act=paddle.activation.Relu()) fc3 = paddle.layer.fc(input=fc2, size=400, act=paddle.activation.Relu()) - predict = paddle.layer.fc( - input=[dense_fm, sparse_fm, fc3], - size=1, - act=paddle.activation.Sigmoid()) + predict = paddle.layer.fc(input=[dense_fm, sparse_fm, fc3], + size=1, + act=paddle.activation.Sigmoid()) if not infer: label = paddle.layer.data( diff --git a/deep_fm/preprocess.py b/deep_fm/preprocess.py index 4e6f8a6a..36ffea16 100755 --- a/deep_fm/preprocess.py +++ b/deep_fm/preprocess.py @@ -121,8 +121,8 @@ def preprocess(datadir, outdir): continous_vals = [] for i in range(0, len(continous_features)): val = dists.gen(i, features[continous_features[i]]) - continous_vals.append( - "{0:.6f}".format(val).rstrip('0').rstrip('.')) + continous_vals.append("{0:.6f}".format(val).rstrip('0') + .rstrip('.')) categorial_vals = [] for i in range(0, len(categorial_features)): val = dicts.gen(i, features[categorial_features[ @@ -147,13 +147,12 @@ def preprocess(datadir, outdir): continous_vals = [] for i in range(0, len(continous_features)): val = dists.gen(i, features[continous_features[i] - 1]) - continous_vals.append( - "{0:.6f}".format(val).rstrip('0').rstrip('.')) + continous_vals.append("{0:.6f}".format(val).rstrip('0') + .rstrip('.')) categorial_vals = [] for i in range(0, len(categorial_features)): - val = dicts.gen(i, - features[categorial_features[i] - - 1]) + categorial_feature_offset[i] + val = dicts.gen(i, features[categorial_features[ + i] - 1]) + categorial_feature_offset[i] categorial_vals.append(str(val)) continous_vals = ','.join(continous_vals) diff --git a/deep_fm/train.py b/deep_fm/train.py index 0d2e79d6..92d48696 100755 --- a/deep_fm/train.py +++ b/deep_fm/train.py @@ -63,8 +63,9 @@ def train(): params = paddle.parameters.create(model) - trainer = paddle.trainer.SGD( - cost=model, parameters=params, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=model, + parameters=params, + update_equation=optimizer) dataset = reader.Dataset() diff --git a/dssm/network_conf.py b/dssm/network_conf.py index ae7574e3..6758982d 100644 --- a/dssm/network_conf.py +++ b/dssm/network_conf.py @@ -102,11 +102,11 @@ class DSSM(object): """ _input_layer = paddle.layer.pooling( input=emb, pooling_type=paddle.pooling.Max()) - fc = paddle.layer.fc( - input=_input_layer, - size=self.dnn_dims[1], - param_attr=ParamAttr(name="%s_fc.w" % prefix), - bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.)) + fc = paddle.layer.fc(input=_input_layer, + size=self.dnn_dims[1], + param_attr=ParamAttr(name="%s_fc.w" % prefix), + bias_attr=ParamAttr( + name="%s_fc.b" % prefix, initial_std=0.)) return fc def create_rnn(self, emb, prefix=""): @@ -161,12 +161,12 @@ class DSSM(object): name = "%s_fc_%d_%d" % (prefix, id, dim) logger.info("create fc layer [%s] which dimention is %d" % (name, dim)) - fc = paddle.layer.fc( - input=_input_layer, - size=dim, - act=paddle.activation.Tanh(), - param_attr=ParamAttr(name="%s.w" % name), - bias_attr=ParamAttr(name="%s.b" % name, initial_std=0.)) + fc = paddle.layer.fc(input=_input_layer, + size=dim, + act=paddle.activation.Tanh(), + param_attr=ParamAttr(name="%s.w" % name), + bias_attr=ParamAttr( + name="%s.b" % name, initial_std=0.)) _input_layer = fc return _input_layer @@ -278,10 +278,9 @@ class DSSM(object): if is_classification: concated_vector = paddle.layer.concat(semantics) - prediction = paddle.layer.fc( - input=concated_vector, - size=self.class_num, - act=paddle.activation.Softmax()) + prediction = paddle.layer.fc(input=concated_vector, + size=self.class_num, + act=paddle.activation.Softmax()) cost = paddle.layer.classification_cost( input=prediction, label=label) else: diff --git a/dssm/reader.py b/dssm/reader.py index f39cd7f5..c0530c50 100644 --- a/dssm/reader.py +++ b/dssm/reader.py @@ -63,7 +63,10 @@ class Dataset(object): target = sent2ids(fs[1], self.target_dic) if not self.is_infer: label = int(fs[2]) - return (source, target, label, ) + return ( + source, + target, + label, ) return source, target def _read_regression_record(self, line): @@ -82,7 +85,10 @@ class Dataset(object): target = sent2ids(fs[1], self.target_dic) if not self.is_infer: label = float(fs[2]) - return (source, target, [label], ) + return ( + source, + target, + [label], ) return source, target def _read_rank_record(self, line): diff --git a/dssm/train.py b/dssm/train.py index 9d5b5782..d7ec8aa6 100644 --- a/dssm/train.py +++ b/dssm/train.py @@ -173,11 +173,13 @@ def train(train_data_path=None, model_type=model_type, ) train_reader = paddle.batch( - paddle.reader.shuffle(dataset.train, buf_size=1000), + paddle.reader.shuffle( + dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch( - paddle.reader.shuffle(dataset.test, buf_size=1000), + paddle.reader.shuffle( + dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) diff --git a/fluid/adversarial/advbox/models/paddle.py b/fluid/adversarial/advbox/models/paddle.py index 976a525c..33b2a3d5 100644 --- a/fluid/adversarial/advbox/models/paddle.py +++ b/fluid/adversarial/advbox/models/paddle.py @@ -65,10 +65,9 @@ class PaddleModel(Model): place=self._place, program=self._program) predict_var = self._program.block(0).var(self._predict_name) - predict = self._exe.run( - self._program, - feed=feeder.feed(image_batch), - fetch_list=[predict_var]) + predict = self._exe.run(self._program, + feed=feeder.feed(image_batch), + fetch_list=[predict_var]) return predict def num_classes(self): @@ -96,8 +95,7 @@ class PaddleModel(Model): place=self._place, program=self._program) - grad, = self._exe.run( - self._program, - feed=feeder.feed(image_batch), - fetch_list=[self._gradient]) + grad, = self._exe.run(self._program, + feed=feeder.feed(image_batch), + fetch_list=[self._gradient]) return grad diff --git a/fluid/adversarial/fluid_mnist.py b/fluid/adversarial/fluid_mnist.py index e2bfeeaf..db4d4b51 100644 --- a/fluid/adversarial/fluid_mnist.py +++ b/fluid/adversarial/fluid_mnist.py @@ -54,7 +54,8 @@ def main(): ACC_THRESHOLD = 0.98 LOSS_THRESHOLD = 10.0 train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) place = fluid.CPUPlace() @@ -65,10 +66,9 @@ def main(): for pass_id in range(PASS_NUM): accuracy.reset(exe) for data in train_reader(): - loss, acc = exe.run( - fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost] + accuracy.metrics) + loss, acc = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) pass_acc = accuracy.eval(exe) print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" + str(pass_acc)) diff --git a/fluid/adversarial/mnist_tutorial_fgsm.py b/fluid/adversarial/mnist_tutorial_fgsm.py index 4b9a1de8..8b29346b 100644 --- a/fluid/adversarial/mnist_tutorial_fgsm.py +++ b/fluid/adversarial/mnist_tutorial_fgsm.py @@ -59,7 +59,8 @@ def main(): BATCH_SIZE = 1 train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) feeder = fluid.DataFeeder( feed_list=[IMG_NAME, LABEL_NAME], diff --git a/generate_chinese_poetry/network_conf.py b/generate_chinese_poetry/network_conf.py index 1aee1aa2..b1314bd6 100755 --- a/generate_chinese_poetry/network_conf.py +++ b/generate_chinese_poetry/network_conf.py @@ -59,11 +59,10 @@ def _attended_decoder_step(word_count, enc_out, enc_out_proj, gate_act=paddle.activation.Sigmoid(), state_act=paddle.activation.Tanh()) - next_word = paddle.layer.fc( - size=word_count, - bias_attr=True, - act=paddle.activation.Softmax(), - input=lstm) + next_word = paddle.layer.fc(size=word_count, + bias_attr=True, + act=paddle.activation.Softmax(), + input=lstm) return next_word @@ -86,11 +85,10 @@ def encoder_decoder_network(word_count, param_attr=paddle.attr.ParamAttr(name="__embedding__")) enc_out = _bidirect_lstm_encoder( input=src_emb, hidden_dim=encoder_hidden_dim, depth=encoder_depth) - enc_out_proj = paddle.layer.fc( - act=paddle.activation.Linear(), - size=encoder_hidden_dim, - bias_attr=False, - input=enc_out) + enc_out_proj = paddle.layer.fc(act=paddle.activation.Linear(), + size=encoder_hidden_dim, + bias_attr=False, + input=enc_out) decoder_group_name = "decoder_group" group_inputs = [ diff --git a/generate_chinese_poetry/reader.py b/generate_chinese_poetry/reader.py index 4ecdb041..480db9dd 100755 --- a/generate_chinese_poetry/reader.py +++ b/generate_chinese_poetry/reader.py @@ -26,9 +26,8 @@ def train_reader(data_file_path, word_dict_file): l = len(poetry_ids) if l < 2: continue for i in range(l - 1): - yield poetry_ids[i], poetry_ids[i + - 1][:-1], poetry_ids[i + - 1][1:] + yield poetry_ids[i], poetry_ids[i + 1][:-1], poetry_ids[ + i + 1][1:] return reader @@ -43,10 +42,10 @@ def gen_reader(data_file_path, word_dict_file): with open(data_file_path, "r") as f: for line in f: - input_line = "".join( - line.strip().decode("utf8", errors="ignore").split()) - yield [bos_id - ] + [word_dict.get(word, unk_id) - for word in input_line] + [eos_id] + input_line = "".join(line.strip().decode( + "utf8", errors="ignore").split()) + yield [bos_id] + [ + word_dict.get(word, unk_id) for word in input_line + ] + [eos_id] return reader diff --git a/generate_chinese_poetry/train.py b/generate_chinese_poetry/train.py index 18c9d79b..911c460a 100755 --- a/generate_chinese_poetry/train.py +++ b/generate_chinese_poetry/train.py @@ -96,8 +96,9 @@ def train(num_passes, if init_model_path: load_initial_model(init_model_path, parameters) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) # define data reader train_reader = paddle.batch( diff --git a/generate_sequence_by_rnn_lm/network_conf.py b/generate_sequence_by_rnn_lm/network_conf.py index f2e59a94..55e0d00e 100644 --- a/generate_sequence_by_rnn_lm/network_conf.py +++ b/generate_sequence_by_rnn_lm/network_conf.py @@ -48,8 +48,9 @@ def rnn_lm(vocab_dim, raise Exception("rnn_type error!") # fc(full connected) and output layer - output = paddle.layer.fc( - input=[rnn_cell], size=vocab_dim, act=paddle.activation.Softmax()) + output = paddle.layer.fc(input=[rnn_cell], + size=vocab_dim, + act=paddle.activation.Softmax()) if is_infer: last_word = paddle.layer.last_seq(input=output) diff --git a/generate_sequence_by_rnn_lm/train.py b/generate_sequence_by_rnn_lm/train.py index 19883582..852dd327 100644 --- a/generate_sequence_by_rnn_lm/train.py +++ b/generate_sequence_by_rnn_lm/train.py @@ -46,11 +46,10 @@ def train(topology, # create sum evaluator sum_eval = paddle.evaluator.sum(topology) # create trainer - trainer = paddle.trainer.SGD( - cost=topology, - parameters=parameters, - update_equation=adam_optimizer, - extra_layers=sum_eval) + trainer = paddle.trainer.SGD(cost=topology, + parameters=parameters, + update_equation=adam_optimizer, + extra_layers=sum_eval) # define the event_handler callback def event_handler(event): diff --git a/globally_normalized_reader/basic_modules.py b/globally_normalized_reader/basic_modules.py index bf40e716..a54b46b5 100644 --- a/globally_normalized_reader/basic_modules.py +++ b/globally_normalized_reader/basic_modules.py @@ -110,8 +110,9 @@ def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False): name="__inner_state_%s__" % name, size=hidden_dim, boot_layer=outer_memory) - input_proj = paddle.layer.fc( - size=hidden_dim * 4, bias_attr=False, input=input_layer) + input_proj = paddle.layer.fc(size=hidden_dim * 4, + bias_attr=False, + input=input_layer) return paddle.networks.lstmemory_unit( input=input_proj, name="__inner_state_%s__" % name, diff --git a/globally_normalized_reader/beam_decoding.py b/globally_normalized_reader/beam_decoding.py index a2ce8520..5f7df266 100644 --- a/globally_normalized_reader/beam_decoding.py +++ b/globally_normalized_reader/beam_decoding.py @@ -88,9 +88,9 @@ class BeamDecoding(object): for j in range(beam_size): selected_id = int(self.selected_sentences[i][j]) if selected_id == -1: break - seq_len = self.beam1_seq_start_positions[ - i][selected_id + - 1] - self.beam1_seq_start_positions[i][selected_id] + seq_len = self.beam1_seq_start_positions[i][ + selected_id + 1] - self.beam1_seq_start_positions[i][ + selected_id] self.beam2_seq_start_positions[-1].append( self.beam2_seq_start_positions[-1][-1] + seq_len) @@ -113,9 +113,9 @@ class BeamDecoding(object): self.beam3_seq_start_positions.append([0]) sub_seq_num, beam_size = self.selected_starts.shape for i in range(sub_seq_num): - seq_len = self.beam2_seq_start_positions[ - seq_id][sub_seq_id + - 1] - self.beam2_seq_start_positions[seq_id][sub_seq_id] + seq_len = self.beam2_seq_start_positions[seq_id][ + sub_seq_id + 1] - self.beam2_seq_start_positions[seq_id][ + sub_seq_id] for j in range(beam_size): start_id = int(self.selected_starts[i][j]) if start_id == -1: break @@ -130,8 +130,8 @@ class BeamDecoding(object): [self.beam3_seq_start_positions[-1][-1]]) sub_seq_id = 0 seq_id += 1 - sub_seq_count = len( - self.beam2_seq_start_positions[seq_id]) - 1 + sub_seq_count = len(self.beam2_seq_start_positions[ + seq_id]) - 1 assert ( self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0]) @@ -198,16 +198,11 @@ class BeamDecoding(object): if end_pos == -1: break self.all_searched_ans.append({ - "score": - self.end_scores[seq_offset_in_batch + end_pos], - "sentence_pos": - -1, - "start_span_pos": - -1, - "end_span_pos": - end_pos, - "parent_ids_in_prev_beam": - i + "score": self.end_scores[seq_offset_in_batch + end_pos], + "sentence_pos": -1, + "start_span_pos": -1, + "end_span_pos": end_pos, + "parent_ids_in_prev_beam": i }) sub_seq_id += 1 @@ -265,8 +260,7 @@ class BeamDecoding(object): key=lambda x: x["score"], reverse=True): self.final_ans[i].append({ - "score": - ans["score"], + "score": ans["score"], "label": [ ans["sentence_pos"], ans["start_span_pos"], ans["end_span_pos"] diff --git a/globally_normalized_reader/featurize.py b/globally_normalized_reader/featurize.py index d7dd09bc..9a5f3d26 100644 --- a/globally_normalized_reader/featurize.py +++ b/globally_normalized_reader/featurize.py @@ -235,7 +235,8 @@ def featurize_example(question, context, vocab): # Convert to indices question_idxs = [ vocab.word_to_idx(normalize(w)) - for w in ciseau.tokenize(question, normalize_ascii=False) + for w in ciseau.tokenize( + question, normalize_ascii=False) ] context_sents = ciseau.sent_tokenize( diff --git a/globally_normalized_reader/model.py b/globally_normalized_reader/model.py index e2ef2b23..5ca93808 100644 --- a/globally_normalized_reader/model.py +++ b/globally_normalized_reader/model.py @@ -19,9 +19,11 @@ def build_pretrained_embedding(name, data_type, emb_dim, emb_drop=0.): """ return paddle.layer.embedding( - input=paddle.layer.data(name=name, type=data_type), + input=paddle.layer.data( + name=name, type=data_type), size=emb_dim, - param_attr=paddle.attr.Param(name="GloveVectors", is_static=True), + param_attr=paddle.attr.Param( + name="GloveVectors", is_static=True), layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=emb_drop), ) @@ -56,16 +58,14 @@ def encode_question(input_embedding, input_embedding, lstm_hidden_dim, depth, 0., prefix) # compute passage-independent embeddings. - candidates = paddle.layer.fc( - input=lstm_outs, - bias_attr=False, - size=passage_indep_embedding_dim, - act=paddle.activation.Linear()) - weights = paddle.layer.fc( - input=lstm_outs, - size=1, - bias_attr=False, - act=paddle.activation.SequenceSoftmax()) + candidates = paddle.layer.fc(input=lstm_outs, + bias_attr=False, + size=passage_indep_embedding_dim, + act=paddle.activation.Linear()) + weights = paddle.layer.fc(input=lstm_outs, + size=1, + bias_attr=False, + act=paddle.activation.SequenceSoftmax()) weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights) passage_indep_embedding = paddle.layer.pooling( input=weighted_candidates, pooling_type=paddle.pooling.Sum()) @@ -134,10 +134,9 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings, return paddle.layer.pooling( input=weighted_candidates, pooling_type=paddle.pooling.Sum()) - question_outs_proj = paddle.layer.fc( - input=question_lstm_outs, - bias_attr=False, - size=passage_aligned_embedding_dim) + question_outs_proj = paddle.layer.fc(input=question_lstm_outs, + bias_attr=False, + size=passage_aligned_embedding_dim) return paddle.layer.recurrent_group( input=[ paddle.layer.SubsequenceInput(document_embeddings), @@ -228,11 +227,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, last_state_of_sentence = paddle.layer.last_seq( input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE) - sentence_scores = paddle.layer.fc( - input=last_state_of_sentence, - size=1, - bias_attr=False, - act=paddle.activation.Linear()) + sentence_scores = paddle.layer.fc(input=last_state_of_sentence, + size=1, + bias_attr=False, + act=paddle.activation.Linear()) topk_sentence_ids = paddle.layer.kmax_seq_score( input=sentence_scores, beam_size=config.beam_size) topk_sen = paddle.layer.sub_nested_seq( @@ -255,11 +253,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, _, end_span_embedding = basic_modules.stacked_bidirectional_lstm( topk_start_spans, config.lstm_hidden_dim, config.lstm_depth, config.lstm_hidden_droprate, "__end_span_embeddings__") - end_pos_scores = paddle.layer.fc( - input=end_span_embedding, - size=1, - bias_attr=False, - act=paddle.activation.Linear()) + end_pos_scores = paddle.layer.fc(input=end_span_embedding, + size=1, + bias_attr=False, + act=paddle.activation.Linear()) topk_end_pos_ids = paddle.layer.kmax_seq_score( input=end_pos_scores, beam_size=config.beam_size) diff --git a/globally_normalized_reader/train.py b/globally_normalized_reader/train.py index b0e6ca6d..19c7d0d2 100644 --- a/globally_normalized_reader/train.py +++ b/globally_normalized_reader/train.py @@ -129,7 +129,8 @@ def build_reader(data_dir, batch_size): # testing data is not shuffled test_reader = paddle.batch( - reader.data_reader(valid_samples, is_train=False), + reader.data_reader( + valid_samples, is_train=False), batch_size=batch_size) return train_reader, test_reader, len(train_samples) @@ -222,8 +223,9 @@ def train(model_config, trainer_config): "GloveVectors", load_pretrained_parameters(ModelConfig.pretrained_emb_path)) - trainer = paddle.trainer.SGD( - cost=loss, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=loss, + parameters=parameters, + update_equation=optimizer) event_handler = build_event_handler(trainer_config, parameters, trainer) trainer.train( diff --git a/globally_normalized_reader/vocab.py b/globally_normalized_reader/vocab.py index 874d01c9..345ec4fc 100644 --- a/globally_normalized_reader/vocab.py +++ b/globally_normalized_reader/vocab.py @@ -153,8 +153,8 @@ class Word2Vec(object): line = fin.readline() parts = line.rstrip("\n").rstrip().split(" ") if len(parts) != self.vector_size + 1: - raise ValueError( - "invalid vector on line {}".format(word_id)) + raise ValueError("invalid vector on line {}".format( + word_id)) word, weights = parts[0], [np.float32(x) for x in parts[1:]] self.syn0[word_id] = weights self.index2word.append(word) @@ -280,5 +280,5 @@ def load_word_vectors(param, param[idx, :] = missing_word_value() missing += 1 if verbose: - print( - "Loaded {} words, {} missing".format(len(vocab) - missing, missing)) + print("Loaded {} words, {} missing".format( + len(vocab) - missing, missing)) diff --git a/hsigmoid/network_conf.py b/hsigmoid/network_conf.py index 721db84f..aa1126c7 100644 --- a/hsigmoid/network_conf.py +++ b/hsigmoid/network_conf.py @@ -21,14 +21,15 @@ def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True): embed_context = paddle.layer.concat(input=emb_layers) - hidden_layer = paddle.layer.fc( - input=embed_context, - size=hidden_size, - act=paddle.activation.Sigmoid(), - layer_attr=paddle.attr.Extra(drop_rate=0.5), - bias_attr=paddle.attr.Param(learning_rate=2), - param_attr=paddle.attr.Param( - initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1)) + hidden_layer = paddle.layer.fc(input=embed_context, + size=hidden_size, + act=paddle.activation.Sigmoid(), + layer_attr=paddle.attr.Extra(drop_rate=0.5), + bias_attr=paddle.attr.Param(learning_rate=2), + param_attr=paddle.attr.Param( + initial_std=1. / + math.sqrt(embed_size * 8), + learning_rate=1)) if is_train == True: return paddle.layer.hsigmoid( diff --git a/hsigmoid/train.py b/hsigmoid/train.py index 75769c30..8ee0717e 100644 --- a/hsigmoid/train.py +++ b/hsigmoid/train.py @@ -50,7 +50,8 @@ def main(save_dir="models"): paddle.batch( paddle.reader.shuffle( lambda: paddle.dataset.imikolov.train(word_dict, 5)(), - buf_size=1000), 64), + buf_size=1000), + 64), num_passes=30, event_handler=event_handler) diff --git a/image_classification/alexnet.py b/image_classification/alexnet.py index 5262a97f..1fbd05c0 100644 --- a/image_classification/alexnet.py +++ b/image_classification/alexnet.py @@ -34,17 +34,16 @@ def alexnet(input, class_dim): conv_filter_size=3, pool_type=paddle.pooling.Max()) - fc1 = paddle.layer.fc( - input=pool3, - size=4096, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc( - input=fc1, - size=4096, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc1 = paddle.layer.fc(input=pool3, + size=4096, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=fc1, + size=4096, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) - out = paddle.layer.fc( - input=fc2, size=class_dim, act=paddle.activation.Softmax()) + out = paddle.layer.fc(input=fc2, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/image_classification/caffe2paddle/caffe2paddle.py b/image_classification/caffe2paddle/caffe2paddle.py index a142e6c0..b25c70bb 100644 --- a/image_classification/caffe2paddle/caffe2paddle.py +++ b/image_classification/caffe2paddle/caffe2paddle.py @@ -103,8 +103,8 @@ class ModelConverter(object): @wrap_name_default("batch_norm") def convert_BatchNorm_layer(self, params, name=None): - scale = 1 / np.array(params[-1].data)[0] if np.array( - params[-1].data)[0] != 0 else 0 + scale = 1 / np.array(params[-1].data)[0] if np.array(params[-1].data)[ + 0] != 0 else 0 for i in range(2): data = np.array(params[i].data) * scale file_name = "_%s.w%s" % (name, str(i + 1)) diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py index 474f948f..a60c01db 100644 --- a/image_classification/googlenet.py +++ b/image_classification/googlenet.py @@ -126,8 +126,9 @@ def googlenet(input, class_dim): layer_attr=paddle.attr.Extra(drop_rate=0.4), act=paddle.activation.Linear()) - out = paddle.layer.fc( - input=dropout, size=class_dim, act=paddle.activation.Softmax()) + out = paddle.layer.fc(input=dropout, + size=class_dim, + act=paddle.activation.Softmax()) # fc for output 1 pool_o1 = paddle.layer.img_pool( @@ -144,14 +145,14 @@ def googlenet(input, class_dim): num_filters=128, stride=1, padding=0) - fc_o1 = paddle.layer.fc( - name="fc_o1", - input=conv_o1, - size=1024, - layer_attr=paddle.attr.Extra(drop_rate=0.7), - act=paddle.activation.Relu()) - out1 = paddle.layer.fc( - input=fc_o1, size=class_dim, act=paddle.activation.Softmax()) + fc_o1 = paddle.layer.fc(name="fc_o1", + input=conv_o1, + size=1024, + layer_attr=paddle.attr.Extra(drop_rate=0.7), + act=paddle.activation.Relu()) + out1 = paddle.layer.fc(input=fc_o1, + size=class_dim, + act=paddle.activation.Softmax()) # fc for output 2 pool_o2 = paddle.layer.img_pool( @@ -168,13 +169,13 @@ def googlenet(input, class_dim): num_filters=128, stride=1, padding=0) - fc_o2 = paddle.layer.fc( - name="fc_o2", - input=conv_o2, - size=1024, - layer_attr=paddle.attr.Extra(drop_rate=0.7), - act=paddle.activation.Relu()) - out2 = paddle.layer.fc( - input=fc_o2, size=class_dim, act=paddle.activation.Softmax()) + fc_o2 = paddle.layer.fc(name="fc_o2", + input=conv_o2, + size=1024, + layer_attr=paddle.attr.Extra(drop_rate=0.7), + act=paddle.activation.Relu()) + out2 = paddle.layer.fc(input=fc_o2, + size=class_dim, + act=paddle.activation.Softmax()) return out, out1, out2 diff --git a/image_classification/inception_resnet_v2.py b/image_classification/inception_resnet_v2.py index cddd59ce..06e9eb60 100644 --- a/image_classification/inception_resnet_v2.py +++ b/image_classification/inception_resnet_v2.py @@ -323,6 +323,7 @@ def inception_resnet_v2(input, pool_type=paddle.pooling.Avg(), exclude_mode=False) drop_out = paddle.layer.dropout(input=avgpool_1a, dropout_rate=dropout_rate) - out = paddle.layer.fc( - input=drop_out, size=class_dim, act=paddle.activation.Softmax()) + out = paddle.layer.fc(input=drop_out, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/image_classification/inception_v4.py b/image_classification/inception_v4.py index 9a8c5fa8..e1713926 100644 --- a/image_classification/inception_v4.py +++ b/image_classification/inception_v4.py @@ -518,9 +518,8 @@ def inception_v4(input, class_dim): stride=1, pool_type=paddle.pooling.Avg()) drop = paddle.layer.dropout(input=pool, dropout_rate=0.2) - out = paddle.layer.fc( - name='incept_fc', - input=drop, - size=class_dim, - act=paddle.activation.Softmax()) + out = paddle.layer.fc(name='incept_fc', + input=drop, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/image_classification/resnet.py b/image_classification/resnet.py index 0d8a4a8d..5c884117 100644 --- a/image_classification/resnet.py +++ b/image_classification/resnet.py @@ -73,8 +73,9 @@ def resnet_imagenet(input, class_dim, depth=50): res4 = layer_warp(block_func, res3, 512, stages[3], 2) pool2 = paddle.layer.img_pool( input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) - out = paddle.layer.fc( - input=pool2, size=class_dim, act=paddle.activation.Softmax()) + out = paddle.layer.fc(input=pool2, + size=class_dim, + act=paddle.activation.Softmax()) return out @@ -90,6 +91,7 @@ def resnet_cifar10(input, class_dim, depth=32): res3 = layer_warp(basicblock, res2, 64, n, 2) pool = paddle.layer.img_pool( input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) - out = paddle.layer.fc( - input=pool, size=class_dim, act=paddle.activation.Softmax()) + out = paddle.layer.fc(input=pool, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/image_classification/train.py b/image_classification/train.py index 23720462..d824e10d 100644 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -97,11 +97,10 @@ def main(): batch_size=BATCH_SIZE) # Create trainer - trainer = paddle.trainer.SGD( - cost=cost, - parameters=parameters, - update_equation=optimizer, - extra_layers=extra_layers) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer, + extra_layers=extra_layers) # End batch and end pass event handler def event_handler(event): diff --git a/image_classification/vgg.py b/image_classification/vgg.py index c6ec79a8..4abfb4bf 100644 --- a/image_classification/vgg.py +++ b/image_classification/vgg.py @@ -24,18 +24,17 @@ def vgg(input, nums, class_dim): conv5 = conv_block(conv4, 512, nums[4]) fc_dim = 4096 - fc1 = paddle.layer.fc( - input=conv5, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc( - input=fc1, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - out = paddle.layer.fc( - input=fc2, size=class_dim, act=paddle.activation.Softmax()) + fc1 = paddle.layer.fc(input=conv5, + size=fc_dim, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=fc1, + size=fc_dim, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + out = paddle.layer.fc(input=fc2, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/image_classification/xception.py b/image_classification/xception.py index 41c11b83..fbe8f4ed 100644 --- a/image_classification/xception.py +++ b/image_classification/xception.py @@ -185,9 +185,8 @@ def xception(input, class_dim): stride=1, num_channels=2048, pool_type=paddle.pooling.CudnnAvg()) - out = paddle.layer.fc( - name='xception_fc', - input=pool, - size=class_dim, - act=paddle.activation.Softmax()) + out = paddle.layer.fc(name='xception_fc', + input=pool, + size=class_dim, + act=paddle.activation.Softmax()) return out diff --git a/ltr/lambda_rank.py b/ltr/lambda_rank.py index aae008ba..cb96a2d7 100644 --- a/ltr/lambda_rank.py +++ b/ltr/lambda_rank.py @@ -19,22 +19,19 @@ def lambda_rank(input_dim, is_infer=False): paddle.data_type.dense_vector_sequence(input_dim)) # Define the hidden layer. - hd1 = paddle.layer.fc( - input=data, - size=128, - act=paddle.activation.Tanh(), - param_attr=paddle.attr.Param(initial_std=0.01)) - - hd2 = paddle.layer.fc( - input=hd1, - size=10, - act=paddle.activation.Tanh(), - param_attr=paddle.attr.Param(initial_std=0.01)) - output = paddle.layer.fc( - input=hd2, - size=1, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(initial_std=0.01)) + hd1 = paddle.layer.fc(input=data, + size=128, + act=paddle.activation.Tanh(), + param_attr=paddle.attr.Param(initial_std=0.01)) + + hd2 = paddle.layer.fc(input=hd1, + size=10, + act=paddle.activation.Tanh(), + param_attr=paddle.attr.Param(initial_std=0.01)) + output = paddle.layer.fc(input=hd2, + size=1, + act=paddle.activation.Linear(), + param_attr=paddle.attr.Param(initial_std=0.01)) if not is_infer: label = paddle.layer.data("label", diff --git a/ltr/ranknet.py b/ltr/ranknet.py index 4f39ca93..8484f435 100644 --- a/ltr/ranknet.py +++ b/ltr/ranknet.py @@ -17,20 +17,20 @@ def half_ranknet(name_prefix, input_dim): paddle.data_type.dense_vector(input_dim)) # hidden layer - hd1 = paddle.layer.fc( - input=data, - name=name_prefix + "_hidden", - size=10, - act=paddle.activation.Tanh(), - param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1")) + hd1 = paddle.layer.fc(input=data, + name=name_prefix + "_hidden", + size=10, + act=paddle.activation.Tanh(), + param_attr=paddle.attr.Param( + initial_std=0.01, name="hidden_w1")) # fully connected layer and output layer - output = paddle.layer.fc( - input=hd1, - name=name_prefix + "_score", - size=1, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(initial_std=0.01, name="output")) + output = paddle.layer.fc(input=hd1, + name=name_prefix + "_score", + size=1, + act=paddle.activation.Linear(), + param_attr=paddle.attr.Param( + initial_std=0.01, name="output")) return output diff --git a/ltr/train.py b/ltr/train.py index 1820ce12..2a4d16c7 100644 --- a/ltr/train.py +++ b/ltr/train.py @@ -16,7 +16,8 @@ logger.setLevel(logging.INFO) def ranknet_train(input_dim, num_passes, model_save_dir): train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.mq2007.train, buf_size=100), + paddle.reader.shuffle( + paddle.dataset.mq2007.train, buf_size=100), batch_size=100) test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100) @@ -70,7 +71,8 @@ def lambda_rank_train(input_dim, num_passes, model_save_dir): paddle.dataset.mq2007.test, format="listwise") train_reader = paddle.batch( - paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32) + paddle.reader.shuffle( + fill_default_train, buf_size=100), batch_size=32) test_reader = paddle.batch(fill_default_test, batch_size=32) cost = lambda_rank(input_dim) diff --git a/mt_with_external_memory/external_memory.py b/mt_with_external_memory/external_memory.py index d5df173d..5f26e8d7 100644 --- a/mt_with_external_memory/external_memory.py +++ b/mt_with_external_memory/external_memory.py @@ -74,18 +74,16 @@ class ExternalMemory(object): """Get write/read head's addressing weights via content-based addressing. """ # content-based addressing: a=tanh(W*M + U*key) - key_projection = paddle.layer.fc( - input=key_vector, - size=self.mem_slot_size, - act=paddle.activation.Linear(), - bias_attr=False) + key_projection = paddle.layer.fc(input=key_vector, + size=self.mem_slot_size, + act=paddle.activation.Linear(), + bias_attr=False) key_proj_expanded = paddle.layer.expand( input=key_projection, expand_as=self.external_memory) - memory_projection = paddle.layer.fc( - input=self.external_memory, - size=self.mem_slot_size, - act=paddle.activation.Linear(), - bias_attr=False) + memory_projection = paddle.layer.fc(input=self.external_memory, + size=self.mem_slot_size, + act=paddle.activation.Linear(), + bias_attr=False) merged_projection = paddle.layer.addto( input=[key_proj_expanded, memory_projection], act=paddle.activation.Tanh()) @@ -101,11 +99,10 @@ class ExternalMemory(object): """Interpolate between previous and current addressing weights. """ # prepare interpolation scalar gate: g=sigmoid(W*key) - gate = paddle.layer.fc( - input=key_vector, - size=1, - act=paddle.activation.Sigmoid(), - bias_attr=False) + gate = paddle.layer.fc(input=key_vector, + size=1, + act=paddle.activation.Sigmoid(), + bias_attr=False) # interpolation: w_t = g*w_t+(1-g)*w_{t-1} last_addressing_weight = paddle.layer.memory( name=self.name + "_addressing_weight_" + head_name, @@ -114,7 +111,8 @@ class ExternalMemory(object): interpolated_weight = paddle.layer.interpolation( name=self.name + "_addressing_weight_" + head_name, input=[last_addressing_weight, addressing_weight], - weight=paddle.layer.expand(input=gate, expand_as=addressing_weight)) + weight=paddle.layer.expand( + input=gate, expand_as=addressing_weight)) return interpolated_weight def _get_addressing_weight(self, head_name, key_vector): @@ -143,16 +141,14 @@ class ExternalMemory(object): # get addressing weight for write head write_weight = self._get_addressing_weight("write_head", write_key) # prepare add_vector and erase_vector - erase_vector = paddle.layer.fc( - input=write_key, - size=self.mem_slot_size, - act=paddle.activation.Sigmoid(), - bias_attr=False) - add_vector = paddle.layer.fc( - input=write_key, - size=self.mem_slot_size, - act=paddle.activation.Sigmoid(), - bias_attr=False) + erase_vector = paddle.layer.fc(input=write_key, + size=self.mem_slot_size, + act=paddle.activation.Sigmoid(), + bias_attr=False) + add_vector = paddle.layer.fc(input=write_key, + size=self.mem_slot_size, + act=paddle.activation.Sigmoid(), + bias_attr=False) erase_vector_expand = paddle.layer.expand( input=erase_vector, expand_as=self.external_memory) add_vector_expand = paddle.layer.expand( diff --git a/mt_with_external_memory/infer.py b/mt_with_external_memory/infer.py index 05375129..55ab0917 100644 --- a/mt_with_external_memory/infer.py +++ b/mt_with_external_memory/infer.py @@ -81,8 +81,8 @@ def parse_beam_search_result(beam_result, dictionary): if word != -1: sentence.append(word) else: - sentence_list.append( - ' '.join([dictionary.get(word) for word in sentence[1:]])) + sentence_list.append(' '.join( + [dictionary.get(word) for word in sentence[1:]])) sentence = [] beam_probs = beam_result[0] beam_size = len(beam_probs[0]) @@ -127,7 +127,9 @@ def infer(): append_tuple=(bounded_memory_perturbation, )) for i, item in enumerate(test_append_reader()): if i < args.infer_data_num: - infer_data.append((item[0], item[3], )) + infer_data.append(( + item[0], + item[3], )) # run inference beam_result = paddle.infer( diff --git a/mt_with_external_memory/model.py b/mt_with_external_memory/model.py index 527c9ff6..7342ce14 100644 --- a/mt_with_external_memory/model.py +++ b/mt_with_external_memory/model.py @@ -109,11 +109,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size, :rtype: LayerOutput """ # prepare initial bounded and unbounded memory - bounded_memory_slot_init = paddle.layer.fc( - input=paddle.layer.pooling( - input=source_context, pooling_type=paddle.pooling.Avg()), - size=size, - act=paddle.activation.Sigmoid()) + bounded_memory_slot_init = paddle.layer.fc(input=paddle.layer.pooling( + input=source_context, pooling_type=paddle.pooling.Avg()), + size=size, + act=paddle.activation.Sigmoid()) bounded_memory_perturbation = paddle.layer.data( name='bounded_memory_perturbation', type=paddle.data_type.dense_vector_sequence(size)) @@ -175,11 +174,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size, gru_output = paddle.layer.gru_step( name="gru_decoder", input=gru_inputs, output_mem=state, size=size) # step output - return paddle.layer.fc( - input=[gru_output, context, cur_embedding], - size=dict_size, - act=paddle.activation.Softmax(), - bias_attr=True) + return paddle.layer.fc(input=[gru_output, context, cur_embedding], + size=dict_size, + act=paddle.activation.Softmax(), + bias_attr=True) if not is_generating: target_embeddings = paddle.layer.embedding( diff --git a/mt_with_external_memory/train.py b/mt_with_external_memory/train.py index 4f413564..38d1970c 100644 --- a/mt_with_external_memory/train.py +++ b/mt_with_external_memory/train.py @@ -94,8 +94,9 @@ def train(): # create parameters and trainer parameters = paddle.parameters.create(cost) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) # create data readers feeding = { @@ -113,13 +114,15 @@ def train(): reader=paddle.dataset.wmt14.train(args.dict_size), append_tuple=(bounded_memory_perturbation, )) train_batch_reader = paddle.batch( - reader=paddle.reader.shuffle(reader=train_append_reader, buf_size=8192), + reader=paddle.reader.shuffle( + reader=train_append_reader, buf_size=8192), batch_size=args.batch_size) test_append_reader = reader_append_wrapper( reader=paddle.dataset.wmt14.test(args.dict_size), append_tuple=(bounded_memory_perturbation, )) test_batch_reader = paddle.batch( - reader=paddle.reader.shuffle(reader=test_append_reader, buf_size=8192), + reader=paddle.reader.shuffle( + reader=test_append_reader, buf_size=8192), batch_size=args.batch_size) # create event handler diff --git a/nce_cost/network_conf.py b/nce_cost/network_conf.py index e7bb1796..bdec1b78 100644 --- a/nce_cost/network_conf.py +++ b/nce_cost/network_conf.py @@ -27,14 +27,13 @@ def ngram_lm(hidden_size, emb_size, dict_size, gram_num=4, is_train=True): param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(emb_size * 8))) if is_train: - return paddle.layer.nce( - input=hidden_layer, - label=next_word, - num_classes=dict_size, - param_attr=paddle.attr.Param(name="nce_w"), - bias_attr=paddle.attr.Param(name="nce_b"), - num_neg_samples=25, - neg_distribution=None) + return paddle.layer.nce(input=hidden_layer, + label=next_word, + num_classes=dict_size, + param_attr=paddle.attr.Param(name="nce_w"), + bias_attr=paddle.attr.Param(name="nce_b"), + num_neg_samples=25, + neg_distribution=None) else: return paddle.layer.mixed( size=dict_size, diff --git a/nce_cost/train.py b/nce_cost/train.py index e76df867..11ba1e16 100644 --- a/nce_cost/train.py +++ b/nce_cost/train.py @@ -43,7 +43,8 @@ def train(model_save_dir): paddle.batch( paddle.reader.shuffle( lambda: paddle.dataset.imikolov.train(word_dict, 5)(), - buf_size=1000), 64), + buf_size=1000), + 64), num_passes=1000, event_handler=event_handler) diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index cc7b3983..06cbe3cb 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -17,12 +17,11 @@ def cnn_cov_group(group_input, hidden_size): fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight') fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias') - linear_proj = paddle.layer.fc( - input=[conv3, conv4], - size=hidden_size, - param_attr=[fc_param_attr, fc_param_attr], - bias_attr=fc_bias_attr, - act=paddle.activation.Linear()) + linear_proj = paddle.layer.fc(input=[conv3, conv4], + size=hidden_size, + param_attr=[fc_param_attr, fc_param_attr], + bias_attr=fc_bias_attr, + act=paddle.activation.Linear()) return linear_proj diff --git a/nested_sequence/text_classification/reader.py b/nested_sequence/text_classification/reader.py index 5202942c..a437422d 100644 --- a/nested_sequence/text_classification/reader.py +++ b/nested_sequence/text_classification/reader.py @@ -24,8 +24,8 @@ def tokenize(pattern): """ Read files that match the given pattern. Tokenize and yield each file. """ - with tarfile.open( - paddle.v2.dataset.common.download(URL, 'imdb', MD5)) as tarf: + with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb', + MD5)) as tarf: tf = tarf.next() while tf != None: if bool(pattern.match(tf.name)): @@ -77,11 +77,17 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): def reader(): # Creates two threads that loads positive and negative samples # into qs. - t0 = threading.Thread(target=load, args=(pos_pattern, qs[0], )) + t0 = threading.Thread( + target=load, args=( + pos_pattern, + qs[0], )) t0.daemon = True t0.start() - t1 = threading.Thread(target=load, args=(neg_pattern, qs[1], )) + t1 = threading.Thread( + target=load, args=( + neg_pattern, + qs[1], )) t1.daemon = True t1.start() diff --git a/neural_qa/network.py b/neural_qa/network.py index 0fb19022..1fba80f4 100644 --- a/neural_qa/network.py +++ b/neural_qa/network.py @@ -62,16 +62,17 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim, emb = get_embedding(question, word_vec_dim, wordvecs) # question LSTM - wx = paddle.layer.fc( - act=paddle.activation.Linear(), - size=q_lstm_dim * 4, - input=emb, - param_attr=paddle.attr.ParamAttr( - name="_q_hidden1.w0", - initial_std=default_init_std, - l2_rate=default_l2_rate), - bias_attr=paddle.attr.ParamAttr( - name="_q_hidden1.wbias", initial_std=0, l2_rate=default_l2_rate)) + wx = paddle.layer.fc(act=paddle.activation.Linear(), + size=q_lstm_dim * 4, + input=emb, + param_attr=paddle.attr.ParamAttr( + name="_q_hidden1.w0", + initial_std=default_init_std, + l2_rate=default_l2_rate), + bias_attr=paddle.attr.ParamAttr( + name="_q_hidden1.wbias", + initial_std=0, + l2_rate=default_l2_rate)) q_rnn = paddle.layer.lstmemory( input=wx, bias_attr=paddle.attr.ParamAttr( @@ -83,24 +84,22 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim, q_rnn = paddle.layer.dropout(q_rnn, drop_rate) # self attention - fc = paddle.layer.fc( - act=paddle.activation.Tanh(), - size=latent_chain_dim, - input=q_rnn, - param_attr=paddle.attr.ParamAttr( - name="_attention_layer1.w0", - initial_std=default_init_std, - l2_rate=default_l2_rate), - bias_attr=False) - weight = paddle.layer.fc( - size=1, - act=paddle.activation.SequenceSoftmax(), - input=fc, - param_attr=paddle.attr.ParamAttr( - name="_attention_weight.w0", - initial_std=default_init_std, - l2_rate=default_l2_rate), - bias_attr=False) + fc = paddle.layer.fc(act=paddle.activation.Tanh(), + size=latent_chain_dim, + input=q_rnn, + param_attr=paddle.attr.ParamAttr( + name="_attention_layer1.w0", + initial_std=default_init_std, + l2_rate=default_l2_rate), + bias_attr=False) + weight = paddle.layer.fc(size=1, + act=paddle.activation.SequenceSoftmax(), + input=fc, + param_attr=paddle.attr.ParamAttr( + name="_attention_weight.w0", + initial_std=default_init_std, + l2_rate=default_l2_rate), + bias_attr=False) scaled_q_rnn = paddle.layer.scaling(input=q_rnn, weight=weight) @@ -262,15 +261,14 @@ def define_common_network(conf): conf.default_init_std, conf.default_l2_rate) # pre-compute CRF features - crf_feats = paddle.layer.fc( - act=paddle.activation.Linear(), - input=e_encoding, - size=conf.label_num, - param_attr=paddle.attr.ParamAttr( - name="_output.w0", - initial_std=conf.default_init_std, - l2_rate=conf.default_l2_rate), - bias_attr=False) + crf_feats = paddle.layer.fc(act=paddle.activation.Linear(), + input=e_encoding, + size=conf.label_num, + param_attr=paddle.attr.ParamAttr( + name="_output.w0", + initial_std=conf.default_init_std, + l2_rate=conf.default_l2_rate), + bias_attr=False) return crf_feats, label @@ -283,15 +281,14 @@ def training_net(conf): :rtype: LayerOutput """ e_encoding, label = define_common_network(conf) - crf = paddle.layer.crf( - input=e_encoding, - label=label, - size=conf.label_num, - param_attr=paddle.attr.ParamAttr( - name="_crf.w0", - initial_std=conf.default_init_std, - l2_rate=conf.default_l2_rate), - layer_attr=paddle.attr.ExtraAttr(device=-1)) + crf = paddle.layer.crf(input=e_encoding, + label=label, + size=conf.label_num, + param_attr=paddle.attr.ParamAttr( + name="_crf.w0", + initial_std=conf.default_init_std, + l2_rate=conf.default_l2_rate), + layer_attr=paddle.attr.ExtraAttr(device=-1)) return crf diff --git a/neural_qa/train.py b/neural_qa/train.py index fb717857..e09a1b73 100644 --- a/neural_qa/train.py +++ b/neural_qa/train.py @@ -21,7 +21,8 @@ def save_model(trainer, model_save_dir, parameters, pass_id): def show_parameter_init_info(parameters): """ - Print the information of initialization mean and standard deviation of parameters + Print the information of initialization mean and standard deviation of + parameters :param parameters: the parameters created in a model """ @@ -98,8 +99,9 @@ def train(conf): max_average_window=conf.max_average_window)) # create trainer - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=rmsprop_optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=rmsprop_optimizer) # begin training network def _event_handler(event): diff --git a/neural_qa/utils.py b/neural_qa/utils.py index 28527ded..4096e61b 100644 --- a/neural_qa/utils.py +++ b/neural_qa/utils.py @@ -5,7 +5,12 @@ import sys import numpy __all__ = [ - "open_file", "cumsum", "logger", "DotBar", "load_dict", "load_wordvecs" + "open_file", + "cumsum", + "logger", + "DotBar", + "load_dict", + "load_wordvecs", ] logger = logging.getLogger("paddle") @@ -64,8 +69,7 @@ class DotBar(object): self.dots_per_line = dots_per_line self.f = f - def __enter__( - self, ): + def __enter__(self, ): self.obj.__enter__() self.idx = 0 return self diff --git a/nmt_without_attention/network_conf.py b/nmt_without_attention/network_conf.py index 3f19ed12..595df349 100644 --- a/nmt_without_attention/network_conf.py +++ b/nmt_without_attention/network_conf.py @@ -48,8 +48,9 @@ def seq2seq_net(source_dict_dim, return_seq=True) #### Decoder encoder_last = paddle.layer.last_seq(input=encoded_vector) - encoder_last_projected = paddle.layer.fc( - size=decoder_size, act=paddle.activation.Tanh(), input=encoder_last) + encoder_last_projected = paddle.layer.fc(size=decoder_size, + act=paddle.activation.Tanh(), + input=encoder_last) # gru step def gru_decoder_without_attention(enc_vec, current_word): @@ -68,8 +69,8 @@ def seq2seq_net(source_dict_dim, context = paddle.layer.last_seq(input=enc_vec) - decoder_inputs = paddle.layer.fc( - size=decoder_size * 3, input=[context, current_word]) + decoder_inputs = paddle.layer.fc(size=decoder_size * 3, + input=[context, current_word]) gru_step = paddle.layer.gru_step( name="gru_decoder", @@ -79,11 +80,10 @@ def seq2seq_net(source_dict_dim, output_mem=decoder_mem, size=decoder_size) - out = paddle.layer.fc( - size=target_dict_dim, - bias_attr=True, - act=paddle.activation.Softmax(), - input=gru_step) + out = paddle.layer.fc(size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax(), + input=gru_step) return out group_input1 = paddle.layer.StaticInput(input=encoded_vector) diff --git a/nmt_without_attention/train.py b/nmt_without_attention/train.py index 144d61fa..373e4b8d 100644 --- a/nmt_without_attention/train.py +++ b/nmt_without_attention/train.py @@ -33,8 +33,9 @@ def train(save_dir_path, source_dict_dim, target_dict_dim): learning_rate=1e-3, gradient_clipping_threshold=10.0, regularization=paddle.optimizer.L2Regularization(rate=8e-4)) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) # define data reader wmt14_reader = paddle.batch( diff --git a/scene_text_recognition/infer.py b/scene_text_recognition/infer.py index 64bc4dde..dfcf32ac 100644 --- a/scene_text_recognition/infer.py +++ b/scene_text_recognition/infer.py @@ -70,8 +70,8 @@ def infer(model_path, image_shape, batch_size, label_dict_path, test_batch = [] labels = [] - for i, (image, - label) in enumerate(data_generator.infer_reader(infer_file_list)()): + for i, (image, label + ) in enumerate(data_generator.infer_reader(infer_file_list)()): test_batch.append([image]) labels.append(label) if len(test_batch) == batch_size: diff --git a/scene_text_recognition/network_conf.py b/scene_text_recognition/network_conf.py index bd92bae8..c525c131 100644 --- a/scene_text_recognition/network_conf.py +++ b/scene_text_recognition/network_conf.py @@ -69,10 +69,9 @@ class Model(object): reverse=True) # Map the output of RNN to character distribution. - self.output = layer.fc( - input=[gru_forward, gru_backward], - size=self.num_classes + 1, - act=Linear()) + self.output = layer.fc(input=[gru_forward, gru_backward], + size=self.num_classes + 1, + act=Linear()) self.log_probs = paddle.layer.mixed( input=paddle.layer.identity_projection(input=self.output), diff --git a/scene_text_recognition/train.py b/scene_text_recognition/train.py index 79eccc0b..0fac5c6e 100644 --- a/scene_text_recognition/train.py +++ b/scene_text_recognition/train.py @@ -62,11 +62,10 @@ def train(train_file_list_path, test_file_list_path, label_dict_path, # Create all the trainable parameters. params = paddle.parameters.create(model.cost) - trainer = paddle.trainer.SGD( - cost=model.cost, - parameters=params, - update_equation=optimizer, - extra_layers=model.eval) + trainer = paddle.trainer.SGD(cost=model.cost, + parameters=params, + update_equation=optimizer, + extra_layers=model.eval) # Feeding dictionary. feeding = {'image': 0, 'label': 1} diff --git a/scheduled_sampling/network_conf.py b/scheduled_sampling/network_conf.py index 29560331..f331c15b 100644 --- a/scheduled_sampling/network_conf.py +++ b/scheduled_sampling/network_conf.py @@ -41,19 +41,17 @@ def seqToseq_net(source_dict_dim, encoded_vector = paddle.layer.concat(input=[src_forward, src_reverse]) #### Decoder - encoded_proj = paddle.layer.fc( - input=encoded_vector, - size=decoder_size, - act=paddle.activation.Linear(), - bias_attr=False) + encoded_proj = paddle.layer.fc(input=encoded_vector, + size=decoder_size, + act=paddle.activation.Linear(), + bias_attr=False) reverse_first = paddle.layer.first_seq(input=src_reverse) - decoder_boot = paddle.layer.fc( - input=reverse_first, - size=decoder_size, - act=paddle.activation.Tanh(), - bias_attr=False) + decoder_boot = paddle.layer.fc(input=reverse_first, + size=decoder_size, + act=paddle.activation.Tanh(), + bias_attr=False) def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, true_token_flag): @@ -92,11 +90,10 @@ def seqToseq_net(source_dict_dim, current_word = paddle.layer.multiplex( input=[true_token_flag, true_word, generated_word_emb]) - decoder_inputs = paddle.layer.fc( - input=[context, current_word], - size=decoder_size * 3, - act=paddle.activation.Linear(), - bias_attr=False) + decoder_inputs = paddle.layer.fc(input=[context, current_word], + size=decoder_size * 3, + act=paddle.activation.Linear(), + bias_attr=False) gru_step = paddle.layer.gru_step( name='gru_decoder', @@ -104,11 +101,10 @@ def seqToseq_net(source_dict_dim, output_mem=decoder_mem, size=decoder_size) - out = paddle.layer.fc( - name='gru_out', - input=gru_step, - size=target_dict_dim, - act=paddle.activation.Softmax()) + out = paddle.layer.fc(name='gru_out', + input=gru_step, + size=target_dict_dim, + act=paddle.activation.Softmax()) return out def gru_decoder_with_attention_gen(enc_vec, enc_proj, current_word): @@ -132,11 +128,10 @@ def seqToseq_net(source_dict_dim, encoded_proj=enc_proj, decoder_state=decoder_mem) - decoder_inputs = paddle.layer.fc( - input=[context, current_word], - size=decoder_size * 3, - act=paddle.activation.Linear(), - bias_attr=False) + decoder_inputs = paddle.layer.fc(input=[context, current_word], + size=decoder_size * 3, + act=paddle.activation.Linear(), + bias_attr=False) gru_step = paddle.layer.gru_step( name='gru_decoder', @@ -144,11 +139,10 @@ def seqToseq_net(source_dict_dim, output_mem=decoder_mem, size=decoder_size) - out = paddle.layer.fc( - name='gru_out', - input=gru_step, - size=target_dict_dim, - act=paddle.activation.Softmax()) + out = paddle.layer.fc(name='gru_out', + input=gru_step, + size=target_dict_dim, + act=paddle.activation.Softmax()) return out decoder_group_name = "decoder_group" diff --git a/scheduled_sampling/train.py b/scheduled_sampling/train.py index 34fc20d2..3c8532f1 100644 --- a/scheduled_sampling/train.py +++ b/scheduled_sampling/train.py @@ -72,13 +72,16 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a, parameters = paddle.parameters.create(cost) - trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) wmt14_reader = reader.gen_schedule_data( paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=8192), - schedule_type, decay_a, decay_b) + schedule_type, + decay_a, + decay_b) # define event_handler callback def event_handler(event): @@ -98,7 +101,8 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a, # start to train trainer.train( - reader=paddle.batch(wmt14_reader, batch_size=batch_size), + reader=paddle.batch( + wmt14_reader, batch_size=batch_size), event_handler=event_handler, feeding=reader.feeding, num_passes=num_passes) diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py index 6a0a810c..b8bc8da0 100644 --- a/sequence_tagging_for_ner/network_conf.py +++ b/sequence_tagging_for_ner/network_conf.py @@ -79,27 +79,25 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): # Please do not add any nonlinear activation to this fully connected layer. # The default activation for paddle.layer.fc is the tanh, here needs to set # it to linear explictly. - emission = paddle.layer.fc( - size=label_dict_len, - bias_attr=False, - input=rnn_fea, - act=paddle.activation.Linear(), - param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / - 3)) + emission = paddle.layer.fc(size=label_dict_len, + bias_attr=False, + input=rnn_fea, + act=paddle.activation.Linear(), + param_attr=paddle.attr.Param( + initial_std=1. / math.sqrt(hidden_dim) / 3)) if is_train: target = paddle.layer.data( name="target", type=paddle.data_type.integer_value_sequence(label_dict_len)) - crf = paddle.layer.crf( - size=label_dict_len, - input=emission, - label=target, - param_attr=paddle.attr.Param( - name="crfw", - initial_std=1. / math.sqrt(hidden_dim) / 3, - learning_rate=mix_hidden_lr)) + crf = paddle.layer.crf(size=label_dict_len, + input=emission, + label=target, + param_attr=paddle.attr.Param( + name="crfw", + initial_std=1. / math.sqrt(hidden_dim) / 3, + learning_rate=mix_hidden_lr)) crf_dec = paddle.layer.crf_decoding( size=label_dict_len, diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py index 8d235817..04b748f5 100644 --- a/sequence_tagging_for_ner/train.py +++ b/sequence_tagging_for_ner/train.py @@ -54,11 +54,10 @@ def main(train_data_file, model_average=paddle.optimizer.ModelAverage( average_window=0.5, max_average_window=10000), ) - trainer = paddle.trainer.SGD( - cost=crf_cost, - parameters=parameters, - update_equation=optimizer, - extra_layers=crf_dec) + trainer = paddle.trainer.SGD(cost=crf_cost, + parameters=parameters, + update_equation=optimizer, + extra_layers=crf_dec) train_reader = paddle.batch( paddle.reader.shuffle( diff --git a/ssd/config/__init__.py b/ssd/config/__init__.py index e69de29b..8b137891 100644 --- a/ssd/config/__init__.py +++ b/ssd/config/__init__.py @@ -0,0 +1 @@ + diff --git a/ssd/eval.py b/ssd/eval.py index 345e46f9..4f585eae 100644 --- a/ssd/eval.py +++ b/ssd/eval.py @@ -14,11 +14,10 @@ def eval(eval_file_list, batch_size, data_args, model_path): optimizer = paddle.optimizer.Momentum() - trainer = paddle.trainer.SGD( - cost=cost, - parameters=parameters, - extra_layers=[detect_out], - update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + extra_layers=[detect_out], + update_equation=optimizer) feeding = {'image': 0, 'bbox': 1} diff --git a/ssd/train.py b/ssd/train.py index 602742b5..38b27f7b 100644 --- a/ssd/train.py +++ b/ssd/train.py @@ -24,11 +24,10 @@ def train(train_file_list, dev_file_list, data_args, init_model_path): assert os.path.isfile(init_model_path), 'Invalid model.' parameters.init_from_tar(gzip.open(init_model_path)) - trainer = paddle.trainer.SGD( - cost=cost, - parameters=parameters, - extra_layers=[detect_out], - update_equation=optimizer) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + extra_layers=[detect_out], + update_equation=optimizer) feeding = {'image': 0, 'bbox': 1} diff --git a/text_classification/network_conf.py b/text_classification/network_conf.py index 8f1207ec..bed3da75 100644 --- a/text_classification/network_conf.py +++ b/text_classification/network_conf.py @@ -91,8 +91,9 @@ def convolution_net(dict_dim, input=emb, context_len=4, hidden_size=hid_dim) # fc and output layer - prob = paddle.layer.fc( - input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax()) + prob = paddle.layer.fc(input=[conv_3, conv_4], + size=class_dim, + act=paddle.activation.Softmax()) if is_infer: return prob -- GitLab