提交 bb036f52 编写于 作者: Y ying

use the same version yapf with paddle main repo.

上级 0e844a1c
- repo: https://github.com/pre-commit/mirrors-yapf.git
sha: v0.16.0
- repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks:
- id: yapf
files: \.py$
......
......@@ -42,10 +42,12 @@ class BeamSearch(object):
for sample_id in sample_list:
for path in self.candidate_path[sample_id]:
if len(path['seq']) < self.win_len:
cur_trg = [self.word_padding] * (self.win_len - len(
path['seq']) - 1) + [self.trg_dict['<s>']] + path['seq']
cur_trg_pos = [self.pos_padding] * (self.win_len - len(
path['seq']) - 1) + [0] + range(1, len(path['seq']) + 1)
cur_trg = [self.word_padding] * (
self.win_len - len(path['seq']) - 1
) + [self.trg_dict['<s>']] + path['seq']
cur_trg_pos = [self.pos_padding] * (
self.win_len - len(path['seq']) - 1) + [0] + range(
1, len(path['seq']) + 1)
else:
cur_trg = path['seq'][-self.win_len:]
cur_trg_pos = range(
......@@ -84,13 +86,11 @@ class BeamSearch(object):
for seq_id, path in enumerate(self.candidate_path[sample_id]):
for w in top_words[idx, :]:
score = path['score'] + math.log(prob[idx, w])
candidate_words[sample_id] = candidate_words[sample_id] + [
{
candidate_words[sample_id] = candidate_words[sample_id] + [{
'word': w,
'score': score,
'seq_id': seq_id
}
]
}]
idx = idx + 1
return candidate_words
......@@ -140,10 +140,8 @@ class BeamSearch(object):
w['word']
]
new_path[sample_id] = new_path[sample_id] + [{
'seq':
seq,
'score':
w['score']
'seq': seq,
'score': w['score']
}]
return new_path
......
......@@ -193,8 +193,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum):
m = paddle.layer.dot_prod(input1=expanded, input2=encoded_vec)
attention_weight = paddle.layer.fc(
input=m,
attention_weight = paddle.layer.fc(input=m,
size=1,
act=paddle.activation.SequenceSoftmax(),
bias_attr=False)
......@@ -204,8 +203,7 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum):
attended = paddle.layer.pooling(
input=scaled, pooling_type=paddle.pooling.Sum())
attended_proj = paddle.layer.fc(
input=attended,
attended_proj = paddle.layer.fc(input=attended,
size=state_size,
act=paddle.activation.Linear(),
bias_attr=True)
......@@ -279,8 +277,7 @@ def decoder(token_emb,
if block_input.size == size:
residual = block_input
else:
residual = paddle.layer.fc(
input=block_input,
residual = paddle.layer.fc(input=block_input,
size=size,
act=paddle.activation.Linear(),
bias_attr=True)
......@@ -381,12 +378,14 @@ def conv_seq2seq(src_dict_size,
input=src,
size=emb_dim,
name='src_word_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
src_pos_emb = paddle.layer.embedding(
input=src_pos,
size=emb_dim,
name='src_pos_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
num_attention = len(dec_conv_blocks)
encoded_vec, encoded_sum = encoder(
......@@ -410,12 +409,14 @@ def conv_seq2seq(src_dict_size,
input=trg,
size=emb_dim,
name='trg_word_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
trg_pos_emb = paddle.layer.embedding(
input=trg_pos,
size=emb_dim,
name='trg_pos_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1))
param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
decoder_out, weight = decoder(
token_emb=trg_emb,
......
......@@ -166,8 +166,7 @@ def train(train_data_path,
src_dict_size = src_dict.__len__()
trg_dict_size = trg_dict.__len__()
optimizer = paddle.optimizer.Adam(
learning_rate=1e-3, )
optimizer = paddle.optimizer.Adam(learning_rate=1e-3, )
cost = conv_seq2seq(
src_dict_size=src_dict_size,
......@@ -182,8 +181,9 @@ def train(train_data_path,
# create parameters and trainer
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
padding_num = reduce(lambda x, y: x + y, padding_list)
......
......@@ -79,8 +79,9 @@ all the files are for demo.
feature_dims = {}
categorial_features = ('C1 banner_pos site_category app_category ' +
'device_type device_conn_type').split()
categorial_features = (
'C1 banner_pos site_category app_category ' + 'device_type device_conn_type'
).split()
id_features = 'id site_id app_id device_id _device_id_cross_site_id'.split()
......@@ -335,8 +336,8 @@ class AvazuDataset(object):
else:
fea0 = self.fields[key].cross_fea0
fea1 = self.fields[key].cross_fea1
record.append(
self.fields[key].gen_cross_fea(row[fea0], row[fea1]))
record.append(self.fields[key].gen_cross_fea(row[fea0], row[
fea1]))
sparse_input = concat_sparse_vectors(record, self.id_dims)
......@@ -396,7 +397,8 @@ with open(output_infer_path, 'w') as f:
dnn_input, lr_input = record
dnn_input = ids2dense(dnn_input, feature_dims['dnn_input'])
lr_input = ids2sparse(lr_input)
line = "%s\t%s\n" % (' '.join(map(str, dnn_input)),
line = "%s\t%s\n" % (
' '.join(map(str, dnn_input)),
' '.join(map(str, lr_input)), )
f.write(line)
if id > args.test_set_size:
......
......@@ -60,12 +60,11 @@ class CTRmodel(object):
'''
build DNN submodel.
'''
dnn_embedding = layer.fc(
input=self.dnn_merged_input, size=dnn_layer_dims[0])
dnn_embedding = layer.fc(input=self.dnn_merged_input,
size=dnn_layer_dims[0])
_input_layer = dnn_embedding
for i, dim in enumerate(dnn_layer_dims[1:]):
fc = layer.fc(
input=_input_layer,
fc = layer.fc(input=_input_layer,
size=dim,
act=paddle.activation.Relu(),
name='dnn-fc-%d' % i)
......@@ -76,8 +75,9 @@ class CTRmodel(object):
'''
config LR submodel
'''
fc = layer.fc(
input=self.lr_merged_input, size=1, act=paddle.activation.Relu())
fc = layer.fc(input=self.lr_merged_input,
size=1,
act=paddle.activation.Relu())
return fc
def _build_classification_model(self, dnn, lr):
......@@ -95,8 +95,9 @@ class CTRmodel(object):
def _build_regression_model(self, dnn, lr):
merge_layer = layer.concat(input=[dnn, lr])
self.output = layer.fc(
input=merge_layer, size=1, act=paddle.activation.Sigmoid())
self.output = layer.fc(input=merge_layer,
size=1,
act=paddle.activation.Sigmoid())
if not self.is_infer:
self.train_cost = paddle.layer.square_error_cost(
input=self.output, label=self.click)
......
......@@ -68,8 +68,9 @@ def train():
params = paddle.parameters.create(model.train_cost)
optimizer = paddle.optimizer.AdaGrad()
trainer = paddle.trainer.SGD(
cost=model.train_cost, parameters=params, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=model.train_cost,
parameters=params,
update_equation=optimizer)
dataset = reader.Dataset()
......
......@@ -64,5 +64,7 @@ def load_dnn_input_record(sent):
def load_lr_input_record(sent):
res = []
for _ in [x.split(':') for x in sent.split()]:
res.append((int(_[0]), float(_[1]), ))
res.append((
int(_[0]),
float(_[1]), ))
return res
......@@ -5,8 +5,9 @@ sparse_feature_dim = 117568
def fm_layer(input, factor_size, fm_param_attr):
first_order = paddle.layer.fc(
input=input, size=1, act=paddle.activation.Linear())
first_order = paddle.layer.fc(input=input,
size=1,
act=paddle.activation.Linear())
second_order = paddle.layer.factorization_machine(
input=input,
factor_size=factor_size,
......@@ -51,15 +52,13 @@ def DeepFM(factor_size, infer=False):
sparse_embed_seq = map(embedding_layer, sparse_input_ids)
sparse_embed = paddle.layer.concat(sparse_embed_seq)
fc1 = paddle.layer.fc(
input=[sparse_embed, dense_input],
fc1 = paddle.layer.fc(input=[sparse_embed, dense_input],
size=400,
act=paddle.activation.Relu())
fc2 = paddle.layer.fc(input=fc1, size=400, act=paddle.activation.Relu())
fc3 = paddle.layer.fc(input=fc2, size=400, act=paddle.activation.Relu())
predict = paddle.layer.fc(
input=[dense_fm, sparse_fm, fc3],
predict = paddle.layer.fc(input=[dense_fm, sparse_fm, fc3],
size=1,
act=paddle.activation.Sigmoid())
......
......@@ -121,8 +121,8 @@ def preprocess(datadir, outdir):
continous_vals = []
for i in range(0, len(continous_features)):
val = dists.gen(i, features[continous_features[i]])
continous_vals.append(
"{0:.6f}".format(val).rstrip('0').rstrip('.'))
continous_vals.append("{0:.6f}".format(val).rstrip('0')
.rstrip('.'))
categorial_vals = []
for i in range(0, len(categorial_features)):
val = dicts.gen(i, features[categorial_features[
......@@ -147,13 +147,12 @@ def preprocess(datadir, outdir):
continous_vals = []
for i in range(0, len(continous_features)):
val = dists.gen(i, features[continous_features[i] - 1])
continous_vals.append(
"{0:.6f}".format(val).rstrip('0').rstrip('.'))
continous_vals.append("{0:.6f}".format(val).rstrip('0')
.rstrip('.'))
categorial_vals = []
for i in range(0, len(categorial_features)):
val = dicts.gen(i,
features[categorial_features[i] -
1]) + categorial_feature_offset[i]
val = dicts.gen(i, features[categorial_features[
i] - 1]) + categorial_feature_offset[i]
categorial_vals.append(str(val))
continous_vals = ','.join(continous_vals)
......
......@@ -63,8 +63,9 @@ def train():
params = paddle.parameters.create(model)
trainer = paddle.trainer.SGD(
cost=model, parameters=params, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=model,
parameters=params,
update_equation=optimizer)
dataset = reader.Dataset()
......
......@@ -102,11 +102,11 @@ class DSSM(object):
"""
_input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max())
fc = paddle.layer.fc(
input=_input_layer,
fc = paddle.layer.fc(input=_input_layer,
size=self.dnn_dims[1],
param_attr=ParamAttr(name="%s_fc.w" % prefix),
bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.))
bias_attr=ParamAttr(
name="%s_fc.b" % prefix, initial_std=0.))
return fc
def create_rnn(self, emb, prefix=""):
......@@ -161,12 +161,12 @@ class DSSM(object):
name = "%s_fc_%d_%d" % (prefix, id, dim)
logger.info("create fc layer [%s] which dimention is %d" %
(name, dim))
fc = paddle.layer.fc(
input=_input_layer,
fc = paddle.layer.fc(input=_input_layer,
size=dim,
act=paddle.activation.Tanh(),
param_attr=ParamAttr(name="%s.w" % name),
bias_attr=ParamAttr(name="%s.b" % name, initial_std=0.))
bias_attr=ParamAttr(
name="%s.b" % name, initial_std=0.))
_input_layer = fc
return _input_layer
......@@ -278,8 +278,7 @@ class DSSM(object):
if is_classification:
concated_vector = paddle.layer.concat(semantics)
prediction = paddle.layer.fc(
input=concated_vector,
prediction = paddle.layer.fc(input=concated_vector,
size=self.class_num,
act=paddle.activation.Softmax())
cost = paddle.layer.classification_cost(
......
......@@ -63,7 +63,10 @@ class Dataset(object):
target = sent2ids(fs[1], self.target_dic)
if not self.is_infer:
label = int(fs[2])
return (source, target, label, )
return (
source,
target,
label, )
return source, target
def _read_regression_record(self, line):
......@@ -82,7 +85,10 @@ class Dataset(object):
target = sent2ids(fs[1], self.target_dic)
if not self.is_infer:
label = float(fs[2])
return (source, target, [label], )
return (
source,
target,
[label], )
return source, target
def _read_rank_record(self, line):
......
......@@ -173,11 +173,13 @@ def train(train_data_path=None,
model_type=model_type, )
train_reader = paddle.batch(
paddle.reader.shuffle(dataset.train, buf_size=1000),
paddle.reader.shuffle(
dataset.train, buf_size=1000),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(dataset.test, buf_size=1000),
paddle.reader.shuffle(
dataset.test, buf_size=1000),
batch_size=batch_size)
paddle.init(use_gpu=use_gpu, trainer_count=num_workers)
......
......@@ -65,8 +65,7 @@ class PaddleModel(Model):
place=self._place,
program=self._program)
predict_var = self._program.block(0).var(self._predict_name)
predict = self._exe.run(
self._program,
predict = self._exe.run(self._program,
feed=feeder.feed(image_batch),
fetch_list=[predict_var])
return predict
......@@ -96,8 +95,7 @@ class PaddleModel(Model):
place=self._place,
program=self._program)
grad, = self._exe.run(
self._program,
grad, = self._exe.run(self._program,
feed=feeder.feed(image_batch),
fetch_list=[self._gradient])
return grad
......@@ -54,7 +54,8 @@ def main():
ACC_THRESHOLD = 0.98
LOSS_THRESHOLD = 10.0
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
......@@ -65,8 +66,7 @@ def main():
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
for data in train_reader():
loss, acc = exe.run(
fluid.default_main_program(),
loss, acc = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
......
......@@ -59,7 +59,8 @@ def main():
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder(
feed_list=[IMG_NAME, LABEL_NAME],
......
......@@ -59,8 +59,7 @@ def _attended_decoder_step(word_count, enc_out, enc_out_proj,
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Tanh())
next_word = paddle.layer.fc(
size=word_count,
next_word = paddle.layer.fc(size=word_count,
bias_attr=True,
act=paddle.activation.Softmax(),
input=lstm)
......@@ -86,8 +85,7 @@ def encoder_decoder_network(word_count,
param_attr=paddle.attr.ParamAttr(name="__embedding__"))
enc_out = _bidirect_lstm_encoder(
input=src_emb, hidden_dim=encoder_hidden_dim, depth=encoder_depth)
enc_out_proj = paddle.layer.fc(
act=paddle.activation.Linear(),
enc_out_proj = paddle.layer.fc(act=paddle.activation.Linear(),
size=encoder_hidden_dim,
bias_attr=False,
input=enc_out)
......
......@@ -26,9 +26,8 @@ def train_reader(data_file_path, word_dict_file):
l = len(poetry_ids)
if l < 2: continue
for i in range(l - 1):
yield poetry_ids[i], poetry_ids[i +
1][:-1], poetry_ids[i +
1][1:]
yield poetry_ids[i], poetry_ids[i + 1][:-1], poetry_ids[
i + 1][1:]
return reader
......@@ -43,10 +42,10 @@ def gen_reader(data_file_path, word_dict_file):
with open(data_file_path, "r") as f:
for line in f:
input_line = "".join(
line.strip().decode("utf8", errors="ignore").split())
yield [bos_id
] + [word_dict.get(word, unk_id)
for word in input_line] + [eos_id]
input_line = "".join(line.strip().decode(
"utf8", errors="ignore").split())
yield [bos_id] + [
word_dict.get(word, unk_id) for word in input_line
] + [eos_id]
return reader
......@@ -96,8 +96,9 @@ def train(num_passes,
if init_model_path:
load_initial_model(init_model_path, parameters)
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
train_reader = paddle.batch(
......
......@@ -48,8 +48,9 @@ def rnn_lm(vocab_dim,
raise Exception("rnn_type error!")
# fc(full connected) and output layer
output = paddle.layer.fc(
input=[rnn_cell], size=vocab_dim, act=paddle.activation.Softmax())
output = paddle.layer.fc(input=[rnn_cell],
size=vocab_dim,
act=paddle.activation.Softmax())
if is_infer:
last_word = paddle.layer.last_seq(input=output)
......
......@@ -46,8 +46,7 @@ def train(topology,
# create sum evaluator
sum_eval = paddle.evaluator.sum(topology)
# create trainer
trainer = paddle.trainer.SGD(
cost=topology,
trainer = paddle.trainer.SGD(cost=topology,
parameters=parameters,
update_equation=adam_optimizer,
extra_layers=sum_eval)
......
......@@ -110,8 +110,9 @@ def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
name="__inner_state_%s__" % name,
size=hidden_dim,
boot_layer=outer_memory)
input_proj = paddle.layer.fc(
size=hidden_dim * 4, bias_attr=False, input=input_layer)
input_proj = paddle.layer.fc(size=hidden_dim * 4,
bias_attr=False,
input=input_layer)
return paddle.networks.lstmemory_unit(
input=input_proj,
name="__inner_state_%s__" % name,
......
......@@ -88,9 +88,9 @@ class BeamDecoding(object):
for j in range(beam_size):
selected_id = int(self.selected_sentences[i][j])
if selected_id == -1: break
seq_len = self.beam1_seq_start_positions[
i][selected_id +
1] - self.beam1_seq_start_positions[i][selected_id]
seq_len = self.beam1_seq_start_positions[i][
selected_id + 1] - self.beam1_seq_start_positions[i][
selected_id]
self.beam2_seq_start_positions[-1].append(
self.beam2_seq_start_positions[-1][-1] + seq_len)
......@@ -113,9 +113,9 @@ class BeamDecoding(object):
self.beam3_seq_start_positions.append([0])
sub_seq_num, beam_size = self.selected_starts.shape
for i in range(sub_seq_num):
seq_len = self.beam2_seq_start_positions[
seq_id][sub_seq_id +
1] - self.beam2_seq_start_positions[seq_id][sub_seq_id]
seq_len = self.beam2_seq_start_positions[seq_id][
sub_seq_id + 1] - self.beam2_seq_start_positions[seq_id][
sub_seq_id]
for j in range(beam_size):
start_id = int(self.selected_starts[i][j])
if start_id == -1: break
......@@ -130,8 +130,8 @@ class BeamDecoding(object):
[self.beam3_seq_start_positions[-1][-1]])
sub_seq_id = 0
seq_id += 1
sub_seq_count = len(
self.beam2_seq_start_positions[seq_id]) - 1
sub_seq_count = len(self.beam2_seq_start_positions[
seq_id]) - 1
assert (
self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0])
......@@ -198,16 +198,11 @@ class BeamDecoding(object):
if end_pos == -1: break
self.all_searched_ans.append({
"score":
self.end_scores[seq_offset_in_batch + end_pos],
"sentence_pos":
-1,
"start_span_pos":
-1,
"end_span_pos":
end_pos,
"parent_ids_in_prev_beam":
i
"score": self.end_scores[seq_offset_in_batch + end_pos],
"sentence_pos": -1,
"start_span_pos": -1,
"end_span_pos": end_pos,
"parent_ids_in_prev_beam": i
})
sub_seq_id += 1
......@@ -265,8 +260,7 @@ class BeamDecoding(object):
key=lambda x: x["score"],
reverse=True):
self.final_ans[i].append({
"score":
ans["score"],
"score": ans["score"],
"label": [
ans["sentence_pos"], ans["start_span_pos"],
ans["end_span_pos"]
......
......@@ -235,7 +235,8 @@ def featurize_example(question, context, vocab):
# Convert to indices
question_idxs = [
vocab.word_to_idx(normalize(w))
for w in ciseau.tokenize(question, normalize_ascii=False)
for w in ciseau.tokenize(
question, normalize_ascii=False)
]
context_sents = ciseau.sent_tokenize(
......
......@@ -19,9 +19,11 @@ def build_pretrained_embedding(name, data_type, emb_dim, emb_drop=0.):
"""
return paddle.layer.embedding(
input=paddle.layer.data(name=name, type=data_type),
input=paddle.layer.data(
name=name, type=data_type),
size=emb_dim,
param_attr=paddle.attr.Param(name="GloveVectors", is_static=True),
param_attr=paddle.attr.Param(
name="GloveVectors", is_static=True),
layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=emb_drop), )
......@@ -56,13 +58,11 @@ def encode_question(input_embedding,
input_embedding, lstm_hidden_dim, depth, 0., prefix)
# compute passage-independent embeddings.
candidates = paddle.layer.fc(
input=lstm_outs,
candidates = paddle.layer.fc(input=lstm_outs,
bias_attr=False,
size=passage_indep_embedding_dim,
act=paddle.activation.Linear())
weights = paddle.layer.fc(
input=lstm_outs,
weights = paddle.layer.fc(input=lstm_outs,
size=1,
bias_attr=False,
act=paddle.activation.SequenceSoftmax())
......@@ -134,8 +134,7 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings,
return paddle.layer.pooling(
input=weighted_candidates, pooling_type=paddle.pooling.Sum())
question_outs_proj = paddle.layer.fc(
input=question_lstm_outs,
question_outs_proj = paddle.layer.fc(input=question_lstm_outs,
bias_attr=False,
size=passage_aligned_embedding_dim)
return paddle.layer.recurrent_group(
......@@ -228,8 +227,7 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
last_state_of_sentence = paddle.layer.last_seq(
input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE)
sentence_scores = paddle.layer.fc(
input=last_state_of_sentence,
sentence_scores = paddle.layer.fc(input=last_state_of_sentence,
size=1,
bias_attr=False,
act=paddle.activation.Linear())
......@@ -255,8 +253,7 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
_, end_span_embedding = basic_modules.stacked_bidirectional_lstm(
topk_start_spans, config.lstm_hidden_dim, config.lstm_depth,
config.lstm_hidden_droprate, "__end_span_embeddings__")
end_pos_scores = paddle.layer.fc(
input=end_span_embedding,
end_pos_scores = paddle.layer.fc(input=end_span_embedding,
size=1,
bias_attr=False,
act=paddle.activation.Linear())
......
......@@ -129,7 +129,8 @@ def build_reader(data_dir, batch_size):
# testing data is not shuffled
test_reader = paddle.batch(
reader.data_reader(valid_samples, is_train=False),
reader.data_reader(
valid_samples, is_train=False),
batch_size=batch_size)
return train_reader, test_reader, len(train_samples)
......@@ -222,8 +223,9 @@ def train(model_config, trainer_config):
"GloveVectors",
load_pretrained_parameters(ModelConfig.pretrained_emb_path))
trainer = paddle.trainer.SGD(
cost=loss, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=loss,
parameters=parameters,
update_equation=optimizer)
event_handler = build_event_handler(trainer_config, parameters, trainer)
trainer.train(
......
......@@ -153,8 +153,8 @@ class Word2Vec(object):
line = fin.readline()
parts = line.rstrip("\n").rstrip().split(" ")
if len(parts) != self.vector_size + 1:
raise ValueError(
"invalid vector on line {}".format(word_id))
raise ValueError("invalid vector on line {}".format(
word_id))
word, weights = parts[0], [np.float32(x) for x in parts[1:]]
self.syn0[word_id] = weights
self.index2word.append(word)
......@@ -280,5 +280,5 @@ def load_word_vectors(param,
param[idx, :] = missing_word_value()
missing += 1
if verbose:
print(
"Loaded {} words, {} missing".format(len(vocab) - missing, missing))
print("Loaded {} words, {} missing".format(
len(vocab) - missing, missing))
......@@ -21,14 +21,15 @@ def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True):
embed_context = paddle.layer.concat(input=emb_layers)
hidden_layer = paddle.layer.fc(
input=embed_context,
hidden_layer = paddle.layer.fc(input=embed_context,
size=hidden_size,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1))
initial_std=1. /
math.sqrt(embed_size * 8),
learning_rate=1))
if is_train == True:
return paddle.layer.hsigmoid(
......
......@@ -50,7 +50,8 @@ def main(save_dir="models"):
paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
buf_size=1000), 64),
buf_size=1000),
64),
num_passes=30,
event_handler=event_handler)
......
......@@ -34,17 +34,16 @@ def alexnet(input, class_dim):
conv_filter_size=3,
pool_type=paddle.pooling.Max())
fc1 = paddle.layer.fc(
input=pool3,
fc1 = paddle.layer.fc(input=pool3,
size=4096,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(
input=fc1,
fc2 = paddle.layer.fc(input=fc1,
size=4096,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
out = paddle.layer.fc(
input=fc2, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=fc2,
size=class_dim,
act=paddle.activation.Softmax())
return out
......@@ -103,8 +103,8 @@ class ModelConverter(object):
@wrap_name_default("batch_norm")
def convert_BatchNorm_layer(self, params, name=None):
scale = 1 / np.array(params[-1].data)[0] if np.array(
params[-1].data)[0] != 0 else 0
scale = 1 / np.array(params[-1].data)[0] if np.array(params[-1].data)[
0] != 0 else 0
for i in range(2):
data = np.array(params[i].data) * scale
file_name = "_%s.w%s" % (name, str(i + 1))
......
......@@ -126,8 +126,9 @@ def googlenet(input, class_dim):
layer_attr=paddle.attr.Extra(drop_rate=0.4),
act=paddle.activation.Linear())
out = paddle.layer.fc(
input=dropout, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=dropout,
size=class_dim,
act=paddle.activation.Softmax())
# fc for output 1
pool_o1 = paddle.layer.img_pool(
......@@ -144,14 +145,14 @@ def googlenet(input, class_dim):
num_filters=128,
stride=1,
padding=0)
fc_o1 = paddle.layer.fc(
name="fc_o1",
fc_o1 = paddle.layer.fc(name="fc_o1",
input=conv_o1,
size=1024,
layer_attr=paddle.attr.Extra(drop_rate=0.7),
act=paddle.activation.Relu())
out1 = paddle.layer.fc(
input=fc_o1, size=class_dim, act=paddle.activation.Softmax())
out1 = paddle.layer.fc(input=fc_o1,
size=class_dim,
act=paddle.activation.Softmax())
# fc for output 2
pool_o2 = paddle.layer.img_pool(
......@@ -168,13 +169,13 @@ def googlenet(input, class_dim):
num_filters=128,
stride=1,
padding=0)
fc_o2 = paddle.layer.fc(
name="fc_o2",
fc_o2 = paddle.layer.fc(name="fc_o2",
input=conv_o2,
size=1024,
layer_attr=paddle.attr.Extra(drop_rate=0.7),
act=paddle.activation.Relu())
out2 = paddle.layer.fc(
input=fc_o2, size=class_dim, act=paddle.activation.Softmax())
out2 = paddle.layer.fc(input=fc_o2,
size=class_dim,
act=paddle.activation.Softmax())
return out, out1, out2
......@@ -323,6 +323,7 @@ def inception_resnet_v2(input,
pool_type=paddle.pooling.Avg(),
exclude_mode=False)
drop_out = paddle.layer.dropout(input=avgpool_1a, dropout_rate=dropout_rate)
out = paddle.layer.fc(
input=drop_out, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=drop_out,
size=class_dim,
act=paddle.activation.Softmax())
return out
......@@ -518,8 +518,7 @@ def inception_v4(input, class_dim):
stride=1,
pool_type=paddle.pooling.Avg())
drop = paddle.layer.dropout(input=pool, dropout_rate=0.2)
out = paddle.layer.fc(
name='incept_fc',
out = paddle.layer.fc(name='incept_fc',
input=drop,
size=class_dim,
act=paddle.activation.Softmax())
......
......@@ -73,8 +73,9 @@ def resnet_imagenet(input, class_dim, depth=50):
res4 = layer_warp(block_func, res3, 512, stages[3], 2)
pool2 = paddle.layer.img_pool(
input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
out = paddle.layer.fc(
input=pool2, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=pool2,
size=class_dim,
act=paddle.activation.Softmax())
return out
......@@ -90,6 +91,7 @@ def resnet_cifar10(input, class_dim, depth=32):
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
out = paddle.layer.fc(
input=pool, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=pool,
size=class_dim,
act=paddle.activation.Softmax())
return out
......@@ -97,8 +97,7 @@ def main():
batch_size=BATCH_SIZE)
# Create trainer
trainer = paddle.trainer.SGD(
cost=cost,
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=extra_layers)
......
......@@ -24,18 +24,17 @@ def vgg(input, nums, class_dim):
conv5 = conv_block(conv4, 512, nums[4])
fc_dim = 4096
fc1 = paddle.layer.fc(
input=conv5,
fc1 = paddle.layer.fc(input=conv5,
size=fc_dim,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(
input=fc1,
fc2 = paddle.layer.fc(input=fc1,
size=fc_dim,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
out = paddle.layer.fc(
input=fc2, size=class_dim, act=paddle.activation.Softmax())
out = paddle.layer.fc(input=fc2,
size=class_dim,
act=paddle.activation.Softmax())
return out
......
......@@ -185,8 +185,7 @@ def xception(input, class_dim):
stride=1,
num_channels=2048,
pool_type=paddle.pooling.CudnnAvg())
out = paddle.layer.fc(
name='xception_fc',
out = paddle.layer.fc(name='xception_fc',
input=pool,
size=class_dim,
act=paddle.activation.Softmax())
......
......@@ -19,19 +19,16 @@ def lambda_rank(input_dim, is_infer=False):
paddle.data_type.dense_vector_sequence(input_dim))
# Define the hidden layer.
hd1 = paddle.layer.fc(
input=data,
hd1 = paddle.layer.fc(input=data,
size=128,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
hd2 = paddle.layer.fc(
input=hd1,
hd2 = paddle.layer.fc(input=hd1,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
output = paddle.layer.fc(
input=hd2,
output = paddle.layer.fc(input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))
......
......@@ -17,20 +17,20 @@ def half_ranknet(name_prefix, input_dim):
paddle.data_type.dense_vector(input_dim))
# hidden layer
hd1 = paddle.layer.fc(
input=data,
hd1 = paddle.layer.fc(input=data,
name=name_prefix + "_hidden",
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1"))
param_attr=paddle.attr.Param(
initial_std=0.01, name="hidden_w1"))
# fully connected layer and output layer
output = paddle.layer.fc(
input=hd1,
output = paddle.layer.fc(input=hd1,
name=name_prefix + "_score",
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01, name="output"))
param_attr=paddle.attr.Param(
initial_std=0.01, name="output"))
return output
......
......@@ -16,7 +16,8 @@ logger.setLevel(logging.INFO)
def ranknet_train(input_dim, num_passes, model_save_dir):
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mq2007.train, buf_size=100),
paddle.reader.shuffle(
paddle.dataset.mq2007.train, buf_size=100),
batch_size=100)
test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100)
......@@ -70,7 +71,8 @@ def lambda_rank_train(input_dim, num_passes, model_save_dir):
paddle.dataset.mq2007.test, format="listwise")
train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32)
paddle.reader.shuffle(
fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32)
cost = lambda_rank(input_dim)
......
......@@ -74,15 +74,13 @@ class ExternalMemory(object):
"""Get write/read head's addressing weights via content-based addressing.
"""
# content-based addressing: a=tanh(W*M + U*key)
key_projection = paddle.layer.fc(
input=key_vector,
key_projection = paddle.layer.fc(input=key_vector,
size=self.mem_slot_size,
act=paddle.activation.Linear(),
bias_attr=False)
key_proj_expanded = paddle.layer.expand(
input=key_projection, expand_as=self.external_memory)
memory_projection = paddle.layer.fc(
input=self.external_memory,
memory_projection = paddle.layer.fc(input=self.external_memory,
size=self.mem_slot_size,
act=paddle.activation.Linear(),
bias_attr=False)
......@@ -101,8 +99,7 @@ class ExternalMemory(object):
"""Interpolate between previous and current addressing weights.
"""
# prepare interpolation scalar gate: g=sigmoid(W*key)
gate = paddle.layer.fc(
input=key_vector,
gate = paddle.layer.fc(input=key_vector,
size=1,
act=paddle.activation.Sigmoid(),
bias_attr=False)
......@@ -114,7 +111,8 @@ class ExternalMemory(object):
interpolated_weight = paddle.layer.interpolation(
name=self.name + "_addressing_weight_" + head_name,
input=[last_addressing_weight, addressing_weight],
weight=paddle.layer.expand(input=gate, expand_as=addressing_weight))
weight=paddle.layer.expand(
input=gate, expand_as=addressing_weight))
return interpolated_weight
def _get_addressing_weight(self, head_name, key_vector):
......@@ -143,13 +141,11 @@ class ExternalMemory(object):
# get addressing weight for write head
write_weight = self._get_addressing_weight("write_head", write_key)
# prepare add_vector and erase_vector
erase_vector = paddle.layer.fc(
input=write_key,
erase_vector = paddle.layer.fc(input=write_key,
size=self.mem_slot_size,
act=paddle.activation.Sigmoid(),
bias_attr=False)
add_vector = paddle.layer.fc(
input=write_key,
add_vector = paddle.layer.fc(input=write_key,
size=self.mem_slot_size,
act=paddle.activation.Sigmoid(),
bias_attr=False)
......
......@@ -81,8 +81,8 @@ def parse_beam_search_result(beam_result, dictionary):
if word != -1:
sentence.append(word)
else:
sentence_list.append(
' '.join([dictionary.get(word) for word in sentence[1:]]))
sentence_list.append(' '.join(
[dictionary.get(word) for word in sentence[1:]]))
sentence = []
beam_probs = beam_result[0]
beam_size = len(beam_probs[0])
......@@ -127,7 +127,9 @@ def infer():
append_tuple=(bounded_memory_perturbation, ))
for i, item in enumerate(test_append_reader()):
if i < args.infer_data_num:
infer_data.append((item[0], item[3], ))
infer_data.append((
item[0],
item[3], ))
# run inference
beam_result = paddle.infer(
......
......@@ -109,8 +109,7 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
:rtype: LayerOutput
"""
# prepare initial bounded and unbounded memory
bounded_memory_slot_init = paddle.layer.fc(
input=paddle.layer.pooling(
bounded_memory_slot_init = paddle.layer.fc(input=paddle.layer.pooling(
input=source_context, pooling_type=paddle.pooling.Avg()),
size=size,
act=paddle.activation.Sigmoid())
......@@ -175,8 +174,7 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
gru_output = paddle.layer.gru_step(
name="gru_decoder", input=gru_inputs, output_mem=state, size=size)
# step output
return paddle.layer.fc(
input=[gru_output, context, cur_embedding],
return paddle.layer.fc(input=[gru_output, context, cur_embedding],
size=dict_size,
act=paddle.activation.Softmax(),
bias_attr=True)
......
......@@ -94,8 +94,9 @@ def train():
# create parameters and trainer
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# create data readers
feeding = {
......@@ -113,13 +114,15 @@ def train():
reader=paddle.dataset.wmt14.train(args.dict_size),
append_tuple=(bounded_memory_perturbation, ))
train_batch_reader = paddle.batch(
reader=paddle.reader.shuffle(reader=train_append_reader, buf_size=8192),
reader=paddle.reader.shuffle(
reader=train_append_reader, buf_size=8192),
batch_size=args.batch_size)
test_append_reader = reader_append_wrapper(
reader=paddle.dataset.wmt14.test(args.dict_size),
append_tuple=(bounded_memory_perturbation, ))
test_batch_reader = paddle.batch(
reader=paddle.reader.shuffle(reader=test_append_reader, buf_size=8192),
reader=paddle.reader.shuffle(
reader=test_append_reader, buf_size=8192),
batch_size=args.batch_size)
# create event handler
......
......@@ -27,8 +27,7 @@ def ngram_lm(hidden_size, emb_size, dict_size, gram_num=4, is_train=True):
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(emb_size * 8)))
if is_train:
return paddle.layer.nce(
input=hidden_layer,
return paddle.layer.nce(input=hidden_layer,
label=next_word,
num_classes=dict_size,
param_attr=paddle.attr.Param(name="nce_w"),
......
......@@ -43,7 +43,8 @@ def train(model_save_dir):
paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
buf_size=1000), 64),
buf_size=1000),
64),
num_passes=1000,
event_handler=event_handler)
......
......@@ -17,8 +17,7 @@ def cnn_cov_group(group_input, hidden_size):
fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight')
fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias')
linear_proj = paddle.layer.fc(
input=[conv3, conv4],
linear_proj = paddle.layer.fc(input=[conv3, conv4],
size=hidden_size,
param_attr=[fc_param_attr, fc_param_attr],
bias_attr=fc_bias_attr,
......
......@@ -24,8 +24,8 @@ def tokenize(pattern):
"""
Read files that match the given pattern. Tokenize and yield each file.
"""
with tarfile.open(
paddle.v2.dataset.common.download(URL, 'imdb', MD5)) as tarf:
with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
MD5)) as tarf:
tf = tarf.next()
while tf != None:
if bool(pattern.match(tf.name)):
......@@ -77,11 +77,17 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):
def reader():
# Creates two threads that loads positive and negative samples
# into qs.
t0 = threading.Thread(target=load, args=(pos_pattern, qs[0], ))
t0 = threading.Thread(
target=load, args=(
pos_pattern,
qs[0], ))
t0.daemon = True
t0.start()
t1 = threading.Thread(target=load, args=(neg_pattern, qs[1], ))
t1 = threading.Thread(
target=load, args=(
neg_pattern,
qs[1], ))
t1.daemon = True
t1.start()
......
......@@ -62,8 +62,7 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
emb = get_embedding(question, word_vec_dim, wordvecs)
# question LSTM
wx = paddle.layer.fc(
act=paddle.activation.Linear(),
wx = paddle.layer.fc(act=paddle.activation.Linear(),
size=q_lstm_dim * 4,
input=emb,
param_attr=paddle.attr.ParamAttr(
......@@ -71,7 +70,9 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
initial_std=default_init_std,
l2_rate=default_l2_rate),
bias_attr=paddle.attr.ParamAttr(
name="_q_hidden1.wbias", initial_std=0, l2_rate=default_l2_rate))
name="_q_hidden1.wbias",
initial_std=0,
l2_rate=default_l2_rate))
q_rnn = paddle.layer.lstmemory(
input=wx,
bias_attr=paddle.attr.ParamAttr(
......@@ -83,8 +84,7 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
q_rnn = paddle.layer.dropout(q_rnn, drop_rate)
# self attention
fc = paddle.layer.fc(
act=paddle.activation.Tanh(),
fc = paddle.layer.fc(act=paddle.activation.Tanh(),
size=latent_chain_dim,
input=q_rnn,
param_attr=paddle.attr.ParamAttr(
......@@ -92,8 +92,7 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
initial_std=default_init_std,
l2_rate=default_l2_rate),
bias_attr=False)
weight = paddle.layer.fc(
size=1,
weight = paddle.layer.fc(size=1,
act=paddle.activation.SequenceSoftmax(),
input=fc,
param_attr=paddle.attr.ParamAttr(
......@@ -262,8 +261,7 @@ def define_common_network(conf):
conf.default_init_std, conf.default_l2_rate)
# pre-compute CRF features
crf_feats = paddle.layer.fc(
act=paddle.activation.Linear(),
crf_feats = paddle.layer.fc(act=paddle.activation.Linear(),
input=e_encoding,
size=conf.label_num,
param_attr=paddle.attr.ParamAttr(
......@@ -283,8 +281,7 @@ def training_net(conf):
:rtype: LayerOutput
"""
e_encoding, label = define_common_network(conf)
crf = paddle.layer.crf(
input=e_encoding,
crf = paddle.layer.crf(input=e_encoding,
label=label,
size=conf.label_num,
param_attr=paddle.attr.ParamAttr(
......
......@@ -21,7 +21,8 @@ def save_model(trainer, model_save_dir, parameters, pass_id):
def show_parameter_init_info(parameters):
"""
Print the information of initialization mean and standard deviation of parameters
Print the information of initialization mean and standard deviation of
parameters
:param parameters: the parameters created in a model
"""
......@@ -98,8 +99,9 @@ def train(conf):
max_average_window=conf.max_average_window))
# create trainer
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=rmsprop_optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=rmsprop_optimizer)
# begin training network
def _event_handler(event):
......
......@@ -5,7 +5,12 @@ import sys
import numpy
__all__ = [
"open_file", "cumsum", "logger", "DotBar", "load_dict", "load_wordvecs"
"open_file",
"cumsum",
"logger",
"DotBar",
"load_dict",
"load_wordvecs",
]
logger = logging.getLogger("paddle")
......@@ -64,8 +69,7 @@ class DotBar(object):
self.dots_per_line = dots_per_line
self.f = f
def __enter__(
self, ):
def __enter__(self, ):
self.obj.__enter__()
self.idx = 0
return self
......
......@@ -48,8 +48,9 @@ def seq2seq_net(source_dict_dim,
return_seq=True)
#### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector)
encoder_last_projected = paddle.layer.fc(
size=decoder_size, act=paddle.activation.Tanh(), input=encoder_last)
encoder_last_projected = paddle.layer.fc(size=decoder_size,
act=paddle.activation.Tanh(),
input=encoder_last)
# gru step
def gru_decoder_without_attention(enc_vec, current_word):
......@@ -68,8 +69,8 @@ def seq2seq_net(source_dict_dim,
context = paddle.layer.last_seq(input=enc_vec)
decoder_inputs = paddle.layer.fc(
size=decoder_size * 3, input=[context, current_word])
decoder_inputs = paddle.layer.fc(size=decoder_size * 3,
input=[context, current_word])
gru_step = paddle.layer.gru_step(
name="gru_decoder",
......@@ -79,8 +80,7 @@ def seq2seq_net(source_dict_dim,
output_mem=decoder_mem,
size=decoder_size)
out = paddle.layer.fc(
size=target_dict_dim,
out = paddle.layer.fc(size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax(),
input=gru_step)
......
......@@ -33,8 +33,9 @@ def train(save_dir_path, source_dict_dim, target_dict_dim):
learning_rate=1e-3,
gradient_clipping_threshold=10.0,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
wmt14_reader = paddle.batch(
......
......@@ -70,8 +70,8 @@ def infer(model_path, image_shape, batch_size, label_dict_path,
test_batch = []
labels = []
for i, (image,
label) in enumerate(data_generator.infer_reader(infer_file_list)()):
for i, (image, label
) in enumerate(data_generator.infer_reader(infer_file_list)()):
test_batch.append([image])
labels.append(label)
if len(test_batch) == batch_size:
......
......@@ -69,8 +69,7 @@ class Model(object):
reverse=True)
# Map the output of RNN to character distribution.
self.output = layer.fc(
input=[gru_forward, gru_backward],
self.output = layer.fc(input=[gru_forward, gru_backward],
size=self.num_classes + 1,
act=Linear())
......
......@@ -62,8 +62,7 @@ def train(train_file_list_path, test_file_list_path, label_dict_path,
# Create all the trainable parameters.
params = paddle.parameters.create(model.cost)
trainer = paddle.trainer.SGD(
cost=model.cost,
trainer = paddle.trainer.SGD(cost=model.cost,
parameters=params,
update_equation=optimizer,
extra_layers=model.eval)
......
......@@ -41,16 +41,14 @@ def seqToseq_net(source_dict_dim,
encoded_vector = paddle.layer.concat(input=[src_forward, src_reverse])
#### Decoder
encoded_proj = paddle.layer.fc(
input=encoded_vector,
encoded_proj = paddle.layer.fc(input=encoded_vector,
size=decoder_size,
act=paddle.activation.Linear(),
bias_attr=False)
reverse_first = paddle.layer.first_seq(input=src_reverse)
decoder_boot = paddle.layer.fc(
input=reverse_first,
decoder_boot = paddle.layer.fc(input=reverse_first,
size=decoder_size,
act=paddle.activation.Tanh(),
bias_attr=False)
......@@ -92,8 +90,7 @@ def seqToseq_net(source_dict_dim,
current_word = paddle.layer.multiplex(
input=[true_token_flag, true_word, generated_word_emb])
decoder_inputs = paddle.layer.fc(
input=[context, current_word],
decoder_inputs = paddle.layer.fc(input=[context, current_word],
size=decoder_size * 3,
act=paddle.activation.Linear(),
bias_attr=False)
......@@ -104,8 +101,7 @@ def seqToseq_net(source_dict_dim,
output_mem=decoder_mem,
size=decoder_size)
out = paddle.layer.fc(
name='gru_out',
out = paddle.layer.fc(name='gru_out',
input=gru_step,
size=target_dict_dim,
act=paddle.activation.Softmax())
......@@ -132,8 +128,7 @@ def seqToseq_net(source_dict_dim,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
decoder_inputs = paddle.layer.fc(
input=[context, current_word],
decoder_inputs = paddle.layer.fc(input=[context, current_word],
size=decoder_size * 3,
act=paddle.activation.Linear(),
bias_attr=False)
......@@ -144,8 +139,7 @@ def seqToseq_net(source_dict_dim,
output_mem=decoder_mem,
size=decoder_size)
out = paddle.layer.fc(
name='gru_out',
out = paddle.layer.fc(name='gru_out',
input=gru_step,
size=target_dict_dim,
act=paddle.activation.Softmax())
......
......@@ -72,13 +72,16 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a,
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
wmt14_reader = reader.gen_schedule_data(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
schedule_type, decay_a, decay_b)
schedule_type,
decay_a,
decay_b)
# define event_handler callback
def event_handler(event):
......@@ -98,7 +101,8 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a,
# start to train
trainer.train(
reader=paddle.batch(wmt14_reader, batch_size=batch_size),
reader=paddle.batch(
wmt14_reader, batch_size=batch_size),
event_handler=event_handler,
feeding=reader.feeding,
num_passes=num_passes)
......
......@@ -79,21 +79,19 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
# Please do not add any nonlinear activation to this fully connected layer.
# The default activation for paddle.layer.fc is the tanh, here needs to set
# it to linear explictly.
emission = paddle.layer.fc(
size=label_dict_len,
emission = paddle.layer.fc(size=label_dict_len,
bias_attr=False,
input=rnn_fea,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) /
3))
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(hidden_dim) / 3))
if is_train:
target = paddle.layer.data(
name="target",
type=paddle.data_type.integer_value_sequence(label_dict_len))
crf = paddle.layer.crf(
size=label_dict_len,
crf = paddle.layer.crf(size=label_dict_len,
input=emission,
label=target,
param_attr=paddle.attr.Param(
......
......@@ -54,8 +54,7 @@ def main(train_data_file,
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
trainer = paddle.trainer.SGD(
cost=crf_cost,
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=crf_dec)
......
......@@ -14,8 +14,7 @@ def eval(eval_file_list, batch_size, data_args, model_path):
optimizer = paddle.optimizer.Momentum()
trainer = paddle.trainer.SGD(
cost=cost,
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
extra_layers=[detect_out],
update_equation=optimizer)
......
......@@ -24,8 +24,7 @@ def train(train_file_list, dev_file_list, data_args, init_model_path):
assert os.path.isfile(init_model_path), 'Invalid model.'
parameters.init_from_tar(gzip.open(init_model_path))
trainer = paddle.trainer.SGD(
cost=cost,
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
extra_layers=[detect_out],
update_equation=optimizer)
......
......@@ -91,8 +91,9 @@ def convolution_net(dict_dim,
input=emb, context_len=4, hidden_size=hid_dim)
# fc and output layer
prob = paddle.layer.fc(
input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax())
prob = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
if is_infer:
return prob
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册