提交 bb036f52 编写于 作者: Y ying

use the same version yapf with paddle main repo.

上级 0e844a1c
- repo: https://github.com/pre-commit/mirrors-yapf.git - repo: https://github.com/PaddlePaddle/mirrors-yapf.git
sha: v0.16.0 sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
hooks: hooks:
- id: yapf - id: yapf
files: \.py$ files: \.py$
......
...@@ -42,10 +42,12 @@ class BeamSearch(object): ...@@ -42,10 +42,12 @@ class BeamSearch(object):
for sample_id in sample_list: for sample_id in sample_list:
for path in self.candidate_path[sample_id]: for path in self.candidate_path[sample_id]:
if len(path['seq']) < self.win_len: if len(path['seq']) < self.win_len:
cur_trg = [self.word_padding] * (self.win_len - len( cur_trg = [self.word_padding] * (
path['seq']) - 1) + [self.trg_dict['<s>']] + path['seq'] self.win_len - len(path['seq']) - 1
cur_trg_pos = [self.pos_padding] * (self.win_len - len( ) + [self.trg_dict['<s>']] + path['seq']
path['seq']) - 1) + [0] + range(1, len(path['seq']) + 1) cur_trg_pos = [self.pos_padding] * (
self.win_len - len(path['seq']) - 1) + [0] + range(
1, len(path['seq']) + 1)
else: else:
cur_trg = path['seq'][-self.win_len:] cur_trg = path['seq'][-self.win_len:]
cur_trg_pos = range( cur_trg_pos = range(
...@@ -84,13 +86,11 @@ class BeamSearch(object): ...@@ -84,13 +86,11 @@ class BeamSearch(object):
for seq_id, path in enumerate(self.candidate_path[sample_id]): for seq_id, path in enumerate(self.candidate_path[sample_id]):
for w in top_words[idx, :]: for w in top_words[idx, :]:
score = path['score'] + math.log(prob[idx, w]) score = path['score'] + math.log(prob[idx, w])
candidate_words[sample_id] = candidate_words[sample_id] + [ candidate_words[sample_id] = candidate_words[sample_id] + [{
{ 'word': w,
'word': w, 'score': score,
'score': score, 'seq_id': seq_id
'seq_id': seq_id }]
}
]
idx = idx + 1 idx = idx + 1
return candidate_words return candidate_words
...@@ -140,10 +140,8 @@ class BeamSearch(object): ...@@ -140,10 +140,8 @@ class BeamSearch(object):
w['word'] w['word']
] ]
new_path[sample_id] = new_path[sample_id] + [{ new_path[sample_id] = new_path[sample_id] + [{
'seq': 'seq': seq,
seq, 'score': w['score']
'score':
w['score']
}] }]
return new_path return new_path
......
...@@ -193,22 +193,20 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum): ...@@ -193,22 +193,20 @@ def attention(decoder_state, cur_embedding, encoded_vec, encoded_sum):
m = paddle.layer.dot_prod(input1=expanded, input2=encoded_vec) m = paddle.layer.dot_prod(input1=expanded, input2=encoded_vec)
attention_weight = paddle.layer.fc( attention_weight = paddle.layer.fc(input=m,
input=m, size=1,
size=1, act=paddle.activation.SequenceSoftmax(),
act=paddle.activation.SequenceSoftmax(), bias_attr=False)
bias_attr=False)
scaled = paddle.layer.scaling(weight=attention_weight, input=encoded_sum) scaled = paddle.layer.scaling(weight=attention_weight, input=encoded_sum)
attended = paddle.layer.pooling( attended = paddle.layer.pooling(
input=scaled, pooling_type=paddle.pooling.Sum()) input=scaled, pooling_type=paddle.pooling.Sum())
attended_proj = paddle.layer.fc( attended_proj = paddle.layer.fc(input=attended,
input=attended, size=state_size,
size=state_size, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=True)
bias_attr=True)
attention_result = paddle.layer.addto(input=[attended_proj, residual]) attention_result = paddle.layer.addto(input=[attended_proj, residual])
...@@ -279,11 +277,10 @@ def decoder(token_emb, ...@@ -279,11 +277,10 @@ def decoder(token_emb,
if block_input.size == size: if block_input.size == size:
residual = block_input residual = block_input
else: else:
residual = paddle.layer.fc( residual = paddle.layer.fc(input=block_input,
input=block_input, size=size,
size=size, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=True)
bias_attr=True)
decoder_state = gated_conv_with_batchnorm( decoder_state = gated_conv_with_batchnorm(
input=block_input, input=block_input,
...@@ -381,12 +378,14 @@ def conv_seq2seq(src_dict_size, ...@@ -381,12 +378,14 @@ def conv_seq2seq(src_dict_size,
input=src, input=src,
size=emb_dim, size=emb_dim,
name='src_word_emb', name='src_word_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
src_pos_emb = paddle.layer.embedding( src_pos_emb = paddle.layer.embedding(
input=src_pos, input=src_pos,
size=emb_dim, size=emb_dim,
name='src_pos_emb', name='src_pos_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
num_attention = len(dec_conv_blocks) num_attention = len(dec_conv_blocks)
encoded_vec, encoded_sum = encoder( encoded_vec, encoded_sum = encoder(
...@@ -410,12 +409,14 @@ def conv_seq2seq(src_dict_size, ...@@ -410,12 +409,14 @@ def conv_seq2seq(src_dict_size,
input=trg, input=trg,
size=emb_dim, size=emb_dim,
name='trg_word_emb', name='trg_word_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
trg_pos_emb = paddle.layer.embedding( trg_pos_emb = paddle.layer.embedding(
input=trg_pos, input=trg_pos,
size=emb_dim, size=emb_dim,
name='trg_pos_emb', name='trg_pos_emb',
param_attr=paddle.attr.Param(initial_mean=0., initial_std=0.1)) param_attr=paddle.attr.Param(
initial_mean=0., initial_std=0.1))
decoder_out, weight = decoder( decoder_out, weight = decoder(
token_emb=trg_emb, token_emb=trg_emb,
......
...@@ -166,8 +166,7 @@ def train(train_data_path, ...@@ -166,8 +166,7 @@ def train(train_data_path,
src_dict_size = src_dict.__len__() src_dict_size = src_dict.__len__()
trg_dict_size = trg_dict.__len__() trg_dict_size = trg_dict.__len__()
optimizer = paddle.optimizer.Adam( optimizer = paddle.optimizer.Adam(learning_rate=1e-3, )
learning_rate=1e-3, )
cost = conv_seq2seq( cost = conv_seq2seq(
src_dict_size=src_dict_size, src_dict_size=src_dict_size,
...@@ -182,8 +181,9 @@ def train(train_data_path, ...@@ -182,8 +181,9 @@ def train(train_data_path,
# create parameters and trainer # create parameters and trainer
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks] padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
padding_num = reduce(lambda x, y: x + y, padding_list) padding_num = reduce(lambda x, y: x + y, padding_list)
......
...@@ -79,8 +79,9 @@ all the files are for demo. ...@@ -79,8 +79,9 @@ all the files are for demo.
feature_dims = {} feature_dims = {}
categorial_features = ('C1 banner_pos site_category app_category ' + categorial_features = (
'device_type device_conn_type').split() 'C1 banner_pos site_category app_category ' + 'device_type device_conn_type'
).split()
id_features = 'id site_id app_id device_id _device_id_cross_site_id'.split() id_features = 'id site_id app_id device_id _device_id_cross_site_id'.split()
...@@ -335,8 +336,8 @@ class AvazuDataset(object): ...@@ -335,8 +336,8 @@ class AvazuDataset(object):
else: else:
fea0 = self.fields[key].cross_fea0 fea0 = self.fields[key].cross_fea0
fea1 = self.fields[key].cross_fea1 fea1 = self.fields[key].cross_fea1
record.append( record.append(self.fields[key].gen_cross_fea(row[fea0], row[
self.fields[key].gen_cross_fea(row[fea0], row[fea1])) fea1]))
sparse_input = concat_sparse_vectors(record, self.id_dims) sparse_input = concat_sparse_vectors(record, self.id_dims)
...@@ -396,8 +397,9 @@ with open(output_infer_path, 'w') as f: ...@@ -396,8 +397,9 @@ with open(output_infer_path, 'w') as f:
dnn_input, lr_input = record dnn_input, lr_input = record
dnn_input = ids2dense(dnn_input, feature_dims['dnn_input']) dnn_input = ids2dense(dnn_input, feature_dims['dnn_input'])
lr_input = ids2sparse(lr_input) lr_input = ids2sparse(lr_input)
line = "%s\t%s\n" % (' '.join(map(str, dnn_input)), line = "%s\t%s\n" % (
' '.join(map(str, lr_input)), ) ' '.join(map(str, dnn_input)),
' '.join(map(str, lr_input)), )
f.write(line) f.write(line)
if id > args.test_set_size: if id > args.test_set_size:
break break
......
...@@ -60,15 +60,14 @@ class CTRmodel(object): ...@@ -60,15 +60,14 @@ class CTRmodel(object):
''' '''
build DNN submodel. build DNN submodel.
''' '''
dnn_embedding = layer.fc( dnn_embedding = layer.fc(input=self.dnn_merged_input,
input=self.dnn_merged_input, size=dnn_layer_dims[0]) size=dnn_layer_dims[0])
_input_layer = dnn_embedding _input_layer = dnn_embedding
for i, dim in enumerate(dnn_layer_dims[1:]): for i, dim in enumerate(dnn_layer_dims[1:]):
fc = layer.fc( fc = layer.fc(input=_input_layer,
input=_input_layer, size=dim,
size=dim, act=paddle.activation.Relu(),
act=paddle.activation.Relu(), name='dnn-fc-%d' % i)
name='dnn-fc-%d' % i)
_input_layer = fc _input_layer = fc
return _input_layer return _input_layer
...@@ -76,8 +75,9 @@ class CTRmodel(object): ...@@ -76,8 +75,9 @@ class CTRmodel(object):
''' '''
config LR submodel config LR submodel
''' '''
fc = layer.fc( fc = layer.fc(input=self.lr_merged_input,
input=self.lr_merged_input, size=1, act=paddle.activation.Relu()) size=1,
act=paddle.activation.Relu())
return fc return fc
def _build_classification_model(self, dnn, lr): def _build_classification_model(self, dnn, lr):
...@@ -95,8 +95,9 @@ class CTRmodel(object): ...@@ -95,8 +95,9 @@ class CTRmodel(object):
def _build_regression_model(self, dnn, lr): def _build_regression_model(self, dnn, lr):
merge_layer = layer.concat(input=[dnn, lr]) merge_layer = layer.concat(input=[dnn, lr])
self.output = layer.fc( self.output = layer.fc(input=merge_layer,
input=merge_layer, size=1, act=paddle.activation.Sigmoid()) size=1,
act=paddle.activation.Sigmoid())
if not self.is_infer: if not self.is_infer:
self.train_cost = paddle.layer.square_error_cost( self.train_cost = paddle.layer.square_error_cost(
input=self.output, label=self.click) input=self.output, label=self.click)
......
...@@ -68,8 +68,9 @@ def train(): ...@@ -68,8 +68,9 @@ def train():
params = paddle.parameters.create(model.train_cost) params = paddle.parameters.create(model.train_cost)
optimizer = paddle.optimizer.AdaGrad() optimizer = paddle.optimizer.AdaGrad()
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=model.train_cost,
cost=model.train_cost, parameters=params, update_equation=optimizer) parameters=params,
update_equation=optimizer)
dataset = reader.Dataset() dataset = reader.Dataset()
......
...@@ -64,5 +64,7 @@ def load_dnn_input_record(sent): ...@@ -64,5 +64,7 @@ def load_dnn_input_record(sent):
def load_lr_input_record(sent): def load_lr_input_record(sent):
res = [] res = []
for _ in [x.split(':') for x in sent.split()]: for _ in [x.split(':') for x in sent.split()]:
res.append((int(_[0]), float(_[1]), )) res.append((
int(_[0]),
float(_[1]), ))
return res return res
...@@ -5,8 +5,9 @@ sparse_feature_dim = 117568 ...@@ -5,8 +5,9 @@ sparse_feature_dim = 117568
def fm_layer(input, factor_size, fm_param_attr): def fm_layer(input, factor_size, fm_param_attr):
first_order = paddle.layer.fc( first_order = paddle.layer.fc(input=input,
input=input, size=1, act=paddle.activation.Linear()) size=1,
act=paddle.activation.Linear())
second_order = paddle.layer.factorization_machine( second_order = paddle.layer.factorization_machine(
input=input, input=input,
factor_size=factor_size, factor_size=factor_size,
...@@ -51,17 +52,15 @@ def DeepFM(factor_size, infer=False): ...@@ -51,17 +52,15 @@ def DeepFM(factor_size, infer=False):
sparse_embed_seq = map(embedding_layer, sparse_input_ids) sparse_embed_seq = map(embedding_layer, sparse_input_ids)
sparse_embed = paddle.layer.concat(sparse_embed_seq) sparse_embed = paddle.layer.concat(sparse_embed_seq)
fc1 = paddle.layer.fc( fc1 = paddle.layer.fc(input=[sparse_embed, dense_input],
input=[sparse_embed, dense_input], size=400,
size=400, act=paddle.activation.Relu())
act=paddle.activation.Relu())
fc2 = paddle.layer.fc(input=fc1, size=400, act=paddle.activation.Relu()) fc2 = paddle.layer.fc(input=fc1, size=400, act=paddle.activation.Relu())
fc3 = paddle.layer.fc(input=fc2, size=400, act=paddle.activation.Relu()) fc3 = paddle.layer.fc(input=fc2, size=400, act=paddle.activation.Relu())
predict = paddle.layer.fc( predict = paddle.layer.fc(input=[dense_fm, sparse_fm, fc3],
input=[dense_fm, sparse_fm, fc3], size=1,
size=1, act=paddle.activation.Sigmoid())
act=paddle.activation.Sigmoid())
if not infer: if not infer:
label = paddle.layer.data( label = paddle.layer.data(
......
...@@ -121,8 +121,8 @@ def preprocess(datadir, outdir): ...@@ -121,8 +121,8 @@ def preprocess(datadir, outdir):
continous_vals = [] continous_vals = []
for i in range(0, len(continous_features)): for i in range(0, len(continous_features)):
val = dists.gen(i, features[continous_features[i]]) val = dists.gen(i, features[continous_features[i]])
continous_vals.append( continous_vals.append("{0:.6f}".format(val).rstrip('0')
"{0:.6f}".format(val).rstrip('0').rstrip('.')) .rstrip('.'))
categorial_vals = [] categorial_vals = []
for i in range(0, len(categorial_features)): for i in range(0, len(categorial_features)):
val = dicts.gen(i, features[categorial_features[ val = dicts.gen(i, features[categorial_features[
...@@ -147,13 +147,12 @@ def preprocess(datadir, outdir): ...@@ -147,13 +147,12 @@ def preprocess(datadir, outdir):
continous_vals = [] continous_vals = []
for i in range(0, len(continous_features)): for i in range(0, len(continous_features)):
val = dists.gen(i, features[continous_features[i] - 1]) val = dists.gen(i, features[continous_features[i] - 1])
continous_vals.append( continous_vals.append("{0:.6f}".format(val).rstrip('0')
"{0:.6f}".format(val).rstrip('0').rstrip('.')) .rstrip('.'))
categorial_vals = [] categorial_vals = []
for i in range(0, len(categorial_features)): for i in range(0, len(categorial_features)):
val = dicts.gen(i, val = dicts.gen(i, features[categorial_features[
features[categorial_features[i] - i] - 1]) + categorial_feature_offset[i]
1]) + categorial_feature_offset[i]
categorial_vals.append(str(val)) categorial_vals.append(str(val))
continous_vals = ','.join(continous_vals) continous_vals = ','.join(continous_vals)
......
...@@ -63,8 +63,9 @@ def train(): ...@@ -63,8 +63,9 @@ def train():
params = paddle.parameters.create(model) params = paddle.parameters.create(model)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=model,
cost=model, parameters=params, update_equation=optimizer) parameters=params,
update_equation=optimizer)
dataset = reader.Dataset() dataset = reader.Dataset()
......
...@@ -102,11 +102,11 @@ class DSSM(object): ...@@ -102,11 +102,11 @@ class DSSM(object):
""" """
_input_layer = paddle.layer.pooling( _input_layer = paddle.layer.pooling(
input=emb, pooling_type=paddle.pooling.Max()) input=emb, pooling_type=paddle.pooling.Max())
fc = paddle.layer.fc( fc = paddle.layer.fc(input=_input_layer,
input=_input_layer, size=self.dnn_dims[1],
size=self.dnn_dims[1], param_attr=ParamAttr(name="%s_fc.w" % prefix),
param_attr=ParamAttr(name="%s_fc.w" % prefix), bias_attr=ParamAttr(
bias_attr=ParamAttr(name="%s_fc.b" % prefix, initial_std=0.)) name="%s_fc.b" % prefix, initial_std=0.))
return fc return fc
def create_rnn(self, emb, prefix=""): def create_rnn(self, emb, prefix=""):
...@@ -161,12 +161,12 @@ class DSSM(object): ...@@ -161,12 +161,12 @@ class DSSM(object):
name = "%s_fc_%d_%d" % (prefix, id, dim) name = "%s_fc_%d_%d" % (prefix, id, dim)
logger.info("create fc layer [%s] which dimention is %d" % logger.info("create fc layer [%s] which dimention is %d" %
(name, dim)) (name, dim))
fc = paddle.layer.fc( fc = paddle.layer.fc(input=_input_layer,
input=_input_layer, size=dim,
size=dim, act=paddle.activation.Tanh(),
act=paddle.activation.Tanh(), param_attr=ParamAttr(name="%s.w" % name),
param_attr=ParamAttr(name="%s.w" % name), bias_attr=ParamAttr(
bias_attr=ParamAttr(name="%s.b" % name, initial_std=0.)) name="%s.b" % name, initial_std=0.))
_input_layer = fc _input_layer = fc
return _input_layer return _input_layer
...@@ -278,10 +278,9 @@ class DSSM(object): ...@@ -278,10 +278,9 @@ class DSSM(object):
if is_classification: if is_classification:
concated_vector = paddle.layer.concat(semantics) concated_vector = paddle.layer.concat(semantics)
prediction = paddle.layer.fc( prediction = paddle.layer.fc(input=concated_vector,
input=concated_vector, size=self.class_num,
size=self.class_num, act=paddle.activation.Softmax())
act=paddle.activation.Softmax())
cost = paddle.layer.classification_cost( cost = paddle.layer.classification_cost(
input=prediction, label=label) input=prediction, label=label)
else: else:
......
...@@ -63,7 +63,10 @@ class Dataset(object): ...@@ -63,7 +63,10 @@ class Dataset(object):
target = sent2ids(fs[1], self.target_dic) target = sent2ids(fs[1], self.target_dic)
if not self.is_infer: if not self.is_infer:
label = int(fs[2]) label = int(fs[2])
return (source, target, label, ) return (
source,
target,
label, )
return source, target return source, target
def _read_regression_record(self, line): def _read_regression_record(self, line):
...@@ -82,7 +85,10 @@ class Dataset(object): ...@@ -82,7 +85,10 @@ class Dataset(object):
target = sent2ids(fs[1], self.target_dic) target = sent2ids(fs[1], self.target_dic)
if not self.is_infer: if not self.is_infer:
label = float(fs[2]) label = float(fs[2])
return (source, target, [label], ) return (
source,
target,
[label], )
return source, target return source, target
def _read_rank_record(self, line): def _read_rank_record(self, line):
......
...@@ -173,11 +173,13 @@ def train(train_data_path=None, ...@@ -173,11 +173,13 @@ def train(train_data_path=None,
model_type=model_type, ) model_type=model_type, )
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(dataset.train, buf_size=1000), paddle.reader.shuffle(
dataset.train, buf_size=1000),
batch_size=batch_size) batch_size=batch_size)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.reader.shuffle(dataset.test, buf_size=1000), paddle.reader.shuffle(
dataset.test, buf_size=1000),
batch_size=batch_size) batch_size=batch_size)
paddle.init(use_gpu=use_gpu, trainer_count=num_workers) paddle.init(use_gpu=use_gpu, trainer_count=num_workers)
......
...@@ -65,10 +65,9 @@ class PaddleModel(Model): ...@@ -65,10 +65,9 @@ class PaddleModel(Model):
place=self._place, place=self._place,
program=self._program) program=self._program)
predict_var = self._program.block(0).var(self._predict_name) predict_var = self._program.block(0).var(self._predict_name)
predict = self._exe.run( predict = self._exe.run(self._program,
self._program, feed=feeder.feed(image_batch),
feed=feeder.feed(image_batch), fetch_list=[predict_var])
fetch_list=[predict_var])
return predict return predict
def num_classes(self): def num_classes(self):
...@@ -96,8 +95,7 @@ class PaddleModel(Model): ...@@ -96,8 +95,7 @@ class PaddleModel(Model):
place=self._place, place=self._place,
program=self._program) program=self._program)
grad, = self._exe.run( grad, = self._exe.run(self._program,
self._program, feed=feeder.feed(image_batch),
feed=feeder.feed(image_batch), fetch_list=[self._gradient])
fetch_list=[self._gradient])
return grad return grad
...@@ -54,7 +54,8 @@ def main(): ...@@ -54,7 +54,8 @@ def main():
ACC_THRESHOLD = 0.98 ACC_THRESHOLD = 0.98
LOSS_THRESHOLD = 10.0 LOSS_THRESHOLD = 10.0
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -65,10 +66,9 @@ def main(): ...@@ -65,10 +66,9 @@ def main():
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
accuracy.reset(exe) accuracy.reset(exe)
for data in train_reader(): for data in train_reader():
loss, acc = exe.run( loss, acc = exe.run(fluid.default_main_program(),
fluid.default_main_program(), feed=feeder.feed(data),
feed=feeder.feed(data), fetch_list=[avg_cost] + accuracy.metrics)
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc="
+ str(pass_acc)) + str(pass_acc))
......
...@@ -59,7 +59,8 @@ def main(): ...@@ -59,7 +59,8 @@ def main():
BATCH_SIZE = 1 BATCH_SIZE = 1
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder( feeder = fluid.DataFeeder(
feed_list=[IMG_NAME, LABEL_NAME], feed_list=[IMG_NAME, LABEL_NAME],
......
...@@ -59,11 +59,10 @@ def _attended_decoder_step(word_count, enc_out, enc_out_proj, ...@@ -59,11 +59,10 @@ def _attended_decoder_step(word_count, enc_out, enc_out_proj,
gate_act=paddle.activation.Sigmoid(), gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Tanh()) state_act=paddle.activation.Tanh())
next_word = paddle.layer.fc( next_word = paddle.layer.fc(size=word_count,
size=word_count, bias_attr=True,
bias_attr=True, act=paddle.activation.Softmax(),
act=paddle.activation.Softmax(), input=lstm)
input=lstm)
return next_word return next_word
...@@ -86,11 +85,10 @@ def encoder_decoder_network(word_count, ...@@ -86,11 +85,10 @@ def encoder_decoder_network(word_count,
param_attr=paddle.attr.ParamAttr(name="__embedding__")) param_attr=paddle.attr.ParamAttr(name="__embedding__"))
enc_out = _bidirect_lstm_encoder( enc_out = _bidirect_lstm_encoder(
input=src_emb, hidden_dim=encoder_hidden_dim, depth=encoder_depth) input=src_emb, hidden_dim=encoder_hidden_dim, depth=encoder_depth)
enc_out_proj = paddle.layer.fc( enc_out_proj = paddle.layer.fc(act=paddle.activation.Linear(),
act=paddle.activation.Linear(), size=encoder_hidden_dim,
size=encoder_hidden_dim, bias_attr=False,
bias_attr=False, input=enc_out)
input=enc_out)
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_inputs = [ group_inputs = [
......
...@@ -26,9 +26,8 @@ def train_reader(data_file_path, word_dict_file): ...@@ -26,9 +26,8 @@ def train_reader(data_file_path, word_dict_file):
l = len(poetry_ids) l = len(poetry_ids)
if l < 2: continue if l < 2: continue
for i in range(l - 1): for i in range(l - 1):
yield poetry_ids[i], poetry_ids[i + yield poetry_ids[i], poetry_ids[i + 1][:-1], poetry_ids[
1][:-1], poetry_ids[i + i + 1][1:]
1][1:]
return reader return reader
...@@ -43,10 +42,10 @@ def gen_reader(data_file_path, word_dict_file): ...@@ -43,10 +42,10 @@ def gen_reader(data_file_path, word_dict_file):
with open(data_file_path, "r") as f: with open(data_file_path, "r") as f:
for line in f: for line in f:
input_line = "".join( input_line = "".join(line.strip().decode(
line.strip().decode("utf8", errors="ignore").split()) "utf8", errors="ignore").split())
yield [bos_id yield [bos_id] + [
] + [word_dict.get(word, unk_id) word_dict.get(word, unk_id) for word in input_line
for word in input_line] + [eos_id] ] + [eos_id]
return reader return reader
...@@ -96,8 +96,9 @@ def train(num_passes, ...@@ -96,8 +96,9 @@ def train(num_passes,
if init_model_path: if init_model_path:
load_initial_model(init_model_path, parameters) load_initial_model(init_model_path, parameters)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
# define data reader # define data reader
train_reader = paddle.batch( train_reader = paddle.batch(
......
...@@ -48,8 +48,9 @@ def rnn_lm(vocab_dim, ...@@ -48,8 +48,9 @@ def rnn_lm(vocab_dim,
raise Exception("rnn_type error!") raise Exception("rnn_type error!")
# fc(full connected) and output layer # fc(full connected) and output layer
output = paddle.layer.fc( output = paddle.layer.fc(input=[rnn_cell],
input=[rnn_cell], size=vocab_dim, act=paddle.activation.Softmax()) size=vocab_dim,
act=paddle.activation.Softmax())
if is_infer: if is_infer:
last_word = paddle.layer.last_seq(input=output) last_word = paddle.layer.last_seq(input=output)
......
...@@ -46,11 +46,10 @@ def train(topology, ...@@ -46,11 +46,10 @@ def train(topology,
# create sum evaluator # create sum evaluator
sum_eval = paddle.evaluator.sum(topology) sum_eval = paddle.evaluator.sum(topology)
# create trainer # create trainer
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=topology,
cost=topology, parameters=parameters,
parameters=parameters, update_equation=adam_optimizer,
update_equation=adam_optimizer, extra_layers=sum_eval)
extra_layers=sum_eval)
# define the event_handler callback # define the event_handler callback
def event_handler(event): def event_handler(event):
......
...@@ -110,8 +110,9 @@ def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False): ...@@ -110,8 +110,9 @@ def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
name="__inner_state_%s__" % name, name="__inner_state_%s__" % name,
size=hidden_dim, size=hidden_dim,
boot_layer=outer_memory) boot_layer=outer_memory)
input_proj = paddle.layer.fc( input_proj = paddle.layer.fc(size=hidden_dim * 4,
size=hidden_dim * 4, bias_attr=False, input=input_layer) bias_attr=False,
input=input_layer)
return paddle.networks.lstmemory_unit( return paddle.networks.lstmemory_unit(
input=input_proj, input=input_proj,
name="__inner_state_%s__" % name, name="__inner_state_%s__" % name,
......
...@@ -88,9 +88,9 @@ class BeamDecoding(object): ...@@ -88,9 +88,9 @@ class BeamDecoding(object):
for j in range(beam_size): for j in range(beam_size):
selected_id = int(self.selected_sentences[i][j]) selected_id = int(self.selected_sentences[i][j])
if selected_id == -1: break if selected_id == -1: break
seq_len = self.beam1_seq_start_positions[ seq_len = self.beam1_seq_start_positions[i][
i][selected_id + selected_id + 1] - self.beam1_seq_start_positions[i][
1] - self.beam1_seq_start_positions[i][selected_id] selected_id]
self.beam2_seq_start_positions[-1].append( self.beam2_seq_start_positions[-1].append(
self.beam2_seq_start_positions[-1][-1] + seq_len) self.beam2_seq_start_positions[-1][-1] + seq_len)
...@@ -113,9 +113,9 @@ class BeamDecoding(object): ...@@ -113,9 +113,9 @@ class BeamDecoding(object):
self.beam3_seq_start_positions.append([0]) self.beam3_seq_start_positions.append([0])
sub_seq_num, beam_size = self.selected_starts.shape sub_seq_num, beam_size = self.selected_starts.shape
for i in range(sub_seq_num): for i in range(sub_seq_num):
seq_len = self.beam2_seq_start_positions[ seq_len = self.beam2_seq_start_positions[seq_id][
seq_id][sub_seq_id + sub_seq_id + 1] - self.beam2_seq_start_positions[seq_id][
1] - self.beam2_seq_start_positions[seq_id][sub_seq_id] sub_seq_id]
for j in range(beam_size): for j in range(beam_size):
start_id = int(self.selected_starts[i][j]) start_id = int(self.selected_starts[i][j])
if start_id == -1: break if start_id == -1: break
...@@ -130,8 +130,8 @@ class BeamDecoding(object): ...@@ -130,8 +130,8 @@ class BeamDecoding(object):
[self.beam3_seq_start_positions[-1][-1]]) [self.beam3_seq_start_positions[-1][-1]])
sub_seq_id = 0 sub_seq_id = 0
seq_id += 1 seq_id += 1
sub_seq_count = len( sub_seq_count = len(self.beam2_seq_start_positions[
self.beam2_seq_start_positions[seq_id]) - 1 seq_id]) - 1
assert ( assert (
self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0]) self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0])
...@@ -198,16 +198,11 @@ class BeamDecoding(object): ...@@ -198,16 +198,11 @@ class BeamDecoding(object):
if end_pos == -1: break if end_pos == -1: break
self.all_searched_ans.append({ self.all_searched_ans.append({
"score": "score": self.end_scores[seq_offset_in_batch + end_pos],
self.end_scores[seq_offset_in_batch + end_pos], "sentence_pos": -1,
"sentence_pos": "start_span_pos": -1,
-1, "end_span_pos": end_pos,
"start_span_pos": "parent_ids_in_prev_beam": i
-1,
"end_span_pos":
end_pos,
"parent_ids_in_prev_beam":
i
}) })
sub_seq_id += 1 sub_seq_id += 1
...@@ -265,8 +260,7 @@ class BeamDecoding(object): ...@@ -265,8 +260,7 @@ class BeamDecoding(object):
key=lambda x: x["score"], key=lambda x: x["score"],
reverse=True): reverse=True):
self.final_ans[i].append({ self.final_ans[i].append({
"score": "score": ans["score"],
ans["score"],
"label": [ "label": [
ans["sentence_pos"], ans["start_span_pos"], ans["sentence_pos"], ans["start_span_pos"],
ans["end_span_pos"] ans["end_span_pos"]
......
...@@ -235,7 +235,8 @@ def featurize_example(question, context, vocab): ...@@ -235,7 +235,8 @@ def featurize_example(question, context, vocab):
# Convert to indices # Convert to indices
question_idxs = [ question_idxs = [
vocab.word_to_idx(normalize(w)) vocab.word_to_idx(normalize(w))
for w in ciseau.tokenize(question, normalize_ascii=False) for w in ciseau.tokenize(
question, normalize_ascii=False)
] ]
context_sents = ciseau.sent_tokenize( context_sents = ciseau.sent_tokenize(
......
...@@ -19,9 +19,11 @@ def build_pretrained_embedding(name, data_type, emb_dim, emb_drop=0.): ...@@ -19,9 +19,11 @@ def build_pretrained_embedding(name, data_type, emb_dim, emb_drop=0.):
""" """
return paddle.layer.embedding( return paddle.layer.embedding(
input=paddle.layer.data(name=name, type=data_type), input=paddle.layer.data(
name=name, type=data_type),
size=emb_dim, size=emb_dim,
param_attr=paddle.attr.Param(name="GloveVectors", is_static=True), param_attr=paddle.attr.Param(
name="GloveVectors", is_static=True),
layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=emb_drop), ) layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=emb_drop), )
...@@ -56,16 +58,14 @@ def encode_question(input_embedding, ...@@ -56,16 +58,14 @@ def encode_question(input_embedding,
input_embedding, lstm_hidden_dim, depth, 0., prefix) input_embedding, lstm_hidden_dim, depth, 0., prefix)
# compute passage-independent embeddings. # compute passage-independent embeddings.
candidates = paddle.layer.fc( candidates = paddle.layer.fc(input=lstm_outs,
input=lstm_outs, bias_attr=False,
bias_attr=False, size=passage_indep_embedding_dim,
size=passage_indep_embedding_dim, act=paddle.activation.Linear())
act=paddle.activation.Linear()) weights = paddle.layer.fc(input=lstm_outs,
weights = paddle.layer.fc( size=1,
input=lstm_outs, bias_attr=False,
size=1, act=paddle.activation.SequenceSoftmax())
bias_attr=False,
act=paddle.activation.SequenceSoftmax())
weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights) weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights)
passage_indep_embedding = paddle.layer.pooling( passage_indep_embedding = paddle.layer.pooling(
input=weighted_candidates, pooling_type=paddle.pooling.Sum()) input=weighted_candidates, pooling_type=paddle.pooling.Sum())
...@@ -134,10 +134,9 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings, ...@@ -134,10 +134,9 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings,
return paddle.layer.pooling( return paddle.layer.pooling(
input=weighted_candidates, pooling_type=paddle.pooling.Sum()) input=weighted_candidates, pooling_type=paddle.pooling.Sum())
question_outs_proj = paddle.layer.fc( question_outs_proj = paddle.layer.fc(input=question_lstm_outs,
input=question_lstm_outs, bias_attr=False,
bias_attr=False, size=passage_aligned_embedding_dim)
size=passage_aligned_embedding_dim)
return paddle.layer.recurrent_group( return paddle.layer.recurrent_group(
input=[ input=[
paddle.layer.SubsequenceInput(document_embeddings), paddle.layer.SubsequenceInput(document_embeddings),
...@@ -228,11 +227,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, ...@@ -228,11 +227,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
last_state_of_sentence = paddle.layer.last_seq( last_state_of_sentence = paddle.layer.last_seq(
input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE) input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE)
sentence_scores = paddle.layer.fc( sentence_scores = paddle.layer.fc(input=last_state_of_sentence,
input=last_state_of_sentence, size=1,
size=1, bias_attr=False,
bias_attr=False, act=paddle.activation.Linear())
act=paddle.activation.Linear())
topk_sentence_ids = paddle.layer.kmax_seq_score( topk_sentence_ids = paddle.layer.kmax_seq_score(
input=sentence_scores, beam_size=config.beam_size) input=sentence_scores, beam_size=config.beam_size)
topk_sen = paddle.layer.sub_nested_seq( topk_sen = paddle.layer.sub_nested_seq(
...@@ -255,11 +253,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, ...@@ -255,11 +253,10 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
_, end_span_embedding = basic_modules.stacked_bidirectional_lstm( _, end_span_embedding = basic_modules.stacked_bidirectional_lstm(
topk_start_spans, config.lstm_hidden_dim, config.lstm_depth, topk_start_spans, config.lstm_hidden_dim, config.lstm_depth,
config.lstm_hidden_droprate, "__end_span_embeddings__") config.lstm_hidden_droprate, "__end_span_embeddings__")
end_pos_scores = paddle.layer.fc( end_pos_scores = paddle.layer.fc(input=end_span_embedding,
input=end_span_embedding, size=1,
size=1, bias_attr=False,
bias_attr=False, act=paddle.activation.Linear())
act=paddle.activation.Linear())
topk_end_pos_ids = paddle.layer.kmax_seq_score( topk_end_pos_ids = paddle.layer.kmax_seq_score(
input=end_pos_scores, beam_size=config.beam_size) input=end_pos_scores, beam_size=config.beam_size)
......
...@@ -129,7 +129,8 @@ def build_reader(data_dir, batch_size): ...@@ -129,7 +129,8 @@ def build_reader(data_dir, batch_size):
# testing data is not shuffled # testing data is not shuffled
test_reader = paddle.batch( test_reader = paddle.batch(
reader.data_reader(valid_samples, is_train=False), reader.data_reader(
valid_samples, is_train=False),
batch_size=batch_size) batch_size=batch_size)
return train_reader, test_reader, len(train_samples) return train_reader, test_reader, len(train_samples)
...@@ -222,8 +223,9 @@ def train(model_config, trainer_config): ...@@ -222,8 +223,9 @@ def train(model_config, trainer_config):
"GloveVectors", "GloveVectors",
load_pretrained_parameters(ModelConfig.pretrained_emb_path)) load_pretrained_parameters(ModelConfig.pretrained_emb_path))
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=loss,
cost=loss, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
event_handler = build_event_handler(trainer_config, parameters, trainer) event_handler = build_event_handler(trainer_config, parameters, trainer)
trainer.train( trainer.train(
......
...@@ -153,8 +153,8 @@ class Word2Vec(object): ...@@ -153,8 +153,8 @@ class Word2Vec(object):
line = fin.readline() line = fin.readline()
parts = line.rstrip("\n").rstrip().split(" ") parts = line.rstrip("\n").rstrip().split(" ")
if len(parts) != self.vector_size + 1: if len(parts) != self.vector_size + 1:
raise ValueError( raise ValueError("invalid vector on line {}".format(
"invalid vector on line {}".format(word_id)) word_id))
word, weights = parts[0], [np.float32(x) for x in parts[1:]] word, weights = parts[0], [np.float32(x) for x in parts[1:]]
self.syn0[word_id] = weights self.syn0[word_id] = weights
self.index2word.append(word) self.index2word.append(word)
...@@ -280,5 +280,5 @@ def load_word_vectors(param, ...@@ -280,5 +280,5 @@ def load_word_vectors(param,
param[idx, :] = missing_word_value() param[idx, :] = missing_word_value()
missing += 1 missing += 1
if verbose: if verbose:
print( print("Loaded {} words, {} missing".format(
"Loaded {} words, {} missing".format(len(vocab) - missing, missing)) len(vocab) - missing, missing))
...@@ -21,14 +21,15 @@ def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True): ...@@ -21,14 +21,15 @@ def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True):
embed_context = paddle.layer.concat(input=emb_layers) embed_context = paddle.layer.concat(input=emb_layers)
hidden_layer = paddle.layer.fc( hidden_layer = paddle.layer.fc(input=embed_context,
input=embed_context, size=hidden_size,
size=hidden_size, act=paddle.activation.Sigmoid(),
act=paddle.activation.Sigmoid(), layer_attr=paddle.attr.Extra(drop_rate=0.5),
layer_attr=paddle.attr.Extra(drop_rate=0.5), bias_attr=paddle.attr.Param(learning_rate=2),
bias_attr=paddle.attr.Param(learning_rate=2), param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param( initial_std=1. /
initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1)) math.sqrt(embed_size * 8),
learning_rate=1))
if is_train == True: if is_train == True:
return paddle.layer.hsigmoid( return paddle.layer.hsigmoid(
......
...@@ -50,7 +50,8 @@ def main(save_dir="models"): ...@@ -50,7 +50,8 @@ def main(save_dir="models"):
paddle.batch( paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
lambda: paddle.dataset.imikolov.train(word_dict, 5)(), lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
buf_size=1000), 64), buf_size=1000),
64),
num_passes=30, num_passes=30,
event_handler=event_handler) event_handler=event_handler)
......
...@@ -34,17 +34,16 @@ def alexnet(input, class_dim): ...@@ -34,17 +34,16 @@ def alexnet(input, class_dim):
conv_filter_size=3, conv_filter_size=3,
pool_type=paddle.pooling.Max()) pool_type=paddle.pooling.Max())
fc1 = paddle.layer.fc( fc1 = paddle.layer.fc(input=pool3,
input=pool3, size=4096,
size=4096, act=paddle.activation.Relu(),
act=paddle.activation.Relu(), layer_attr=paddle.attr.Extra(drop_rate=0.5))
layer_attr=paddle.attr.Extra(drop_rate=0.5)) fc2 = paddle.layer.fc(input=fc1,
fc2 = paddle.layer.fc( size=4096,
input=fc1, act=paddle.activation.Relu(),
size=4096, layer_attr=paddle.attr.Extra(drop_rate=0.5))
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
out = paddle.layer.fc( out = paddle.layer.fc(input=fc2,
input=fc2, size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
return out return out
...@@ -103,8 +103,8 @@ class ModelConverter(object): ...@@ -103,8 +103,8 @@ class ModelConverter(object):
@wrap_name_default("batch_norm") @wrap_name_default("batch_norm")
def convert_BatchNorm_layer(self, params, name=None): def convert_BatchNorm_layer(self, params, name=None):
scale = 1 / np.array(params[-1].data)[0] if np.array( scale = 1 / np.array(params[-1].data)[0] if np.array(params[-1].data)[
params[-1].data)[0] != 0 else 0 0] != 0 else 0
for i in range(2): for i in range(2):
data = np.array(params[i].data) * scale data = np.array(params[i].data) * scale
file_name = "_%s.w%s" % (name, str(i + 1)) file_name = "_%s.w%s" % (name, str(i + 1))
......
...@@ -126,8 +126,9 @@ def googlenet(input, class_dim): ...@@ -126,8 +126,9 @@ def googlenet(input, class_dim):
layer_attr=paddle.attr.Extra(drop_rate=0.4), layer_attr=paddle.attr.Extra(drop_rate=0.4),
act=paddle.activation.Linear()) act=paddle.activation.Linear())
out = paddle.layer.fc( out = paddle.layer.fc(input=dropout,
input=dropout, size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
# fc for output 1 # fc for output 1
pool_o1 = paddle.layer.img_pool( pool_o1 = paddle.layer.img_pool(
...@@ -144,14 +145,14 @@ def googlenet(input, class_dim): ...@@ -144,14 +145,14 @@ def googlenet(input, class_dim):
num_filters=128, num_filters=128,
stride=1, stride=1,
padding=0) padding=0)
fc_o1 = paddle.layer.fc( fc_o1 = paddle.layer.fc(name="fc_o1",
name="fc_o1", input=conv_o1,
input=conv_o1, size=1024,
size=1024, layer_attr=paddle.attr.Extra(drop_rate=0.7),
layer_attr=paddle.attr.Extra(drop_rate=0.7), act=paddle.activation.Relu())
act=paddle.activation.Relu()) out1 = paddle.layer.fc(input=fc_o1,
out1 = paddle.layer.fc( size=class_dim,
input=fc_o1, size=class_dim, act=paddle.activation.Softmax()) act=paddle.activation.Softmax())
# fc for output 2 # fc for output 2
pool_o2 = paddle.layer.img_pool( pool_o2 = paddle.layer.img_pool(
...@@ -168,13 +169,13 @@ def googlenet(input, class_dim): ...@@ -168,13 +169,13 @@ def googlenet(input, class_dim):
num_filters=128, num_filters=128,
stride=1, stride=1,
padding=0) padding=0)
fc_o2 = paddle.layer.fc( fc_o2 = paddle.layer.fc(name="fc_o2",
name="fc_o2", input=conv_o2,
input=conv_o2, size=1024,
size=1024, layer_attr=paddle.attr.Extra(drop_rate=0.7),
layer_attr=paddle.attr.Extra(drop_rate=0.7), act=paddle.activation.Relu())
act=paddle.activation.Relu()) out2 = paddle.layer.fc(input=fc_o2,
out2 = paddle.layer.fc( size=class_dim,
input=fc_o2, size=class_dim, act=paddle.activation.Softmax()) act=paddle.activation.Softmax())
return out, out1, out2 return out, out1, out2
...@@ -323,6 +323,7 @@ def inception_resnet_v2(input, ...@@ -323,6 +323,7 @@ def inception_resnet_v2(input,
pool_type=paddle.pooling.Avg(), pool_type=paddle.pooling.Avg(),
exclude_mode=False) exclude_mode=False)
drop_out = paddle.layer.dropout(input=avgpool_1a, dropout_rate=dropout_rate) drop_out = paddle.layer.dropout(input=avgpool_1a, dropout_rate=dropout_rate)
out = paddle.layer.fc( out = paddle.layer.fc(input=drop_out,
input=drop_out, size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
return out return out
...@@ -518,9 +518,8 @@ def inception_v4(input, class_dim): ...@@ -518,9 +518,8 @@ def inception_v4(input, class_dim):
stride=1, stride=1,
pool_type=paddle.pooling.Avg()) pool_type=paddle.pooling.Avg())
drop = paddle.layer.dropout(input=pool, dropout_rate=0.2) drop = paddle.layer.dropout(input=pool, dropout_rate=0.2)
out = paddle.layer.fc( out = paddle.layer.fc(name='incept_fc',
name='incept_fc', input=drop,
input=drop, size=class_dim,
size=class_dim, act=paddle.activation.Softmax())
act=paddle.activation.Softmax())
return out return out
...@@ -73,8 +73,9 @@ def resnet_imagenet(input, class_dim, depth=50): ...@@ -73,8 +73,9 @@ def resnet_imagenet(input, class_dim, depth=50):
res4 = layer_warp(block_func, res3, 512, stages[3], 2) res4 = layer_warp(block_func, res3, 512, stages[3], 2)
pool2 = paddle.layer.img_pool( pool2 = paddle.layer.img_pool(
input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
out = paddle.layer.fc( out = paddle.layer.fc(input=pool2,
input=pool2, size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
return out return out
...@@ -90,6 +91,7 @@ def resnet_cifar10(input, class_dim, depth=32): ...@@ -90,6 +91,7 @@ def resnet_cifar10(input, class_dim, depth=32):
res3 = layer_warp(basicblock, res2, 64, n, 2) res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool( pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
out = paddle.layer.fc( out = paddle.layer.fc(input=pool,
input=pool, size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
return out return out
...@@ -97,11 +97,10 @@ def main(): ...@@ -97,11 +97,10 @@ def main():
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
# Create trainer # Create trainer
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters,
parameters=parameters, update_equation=optimizer,
update_equation=optimizer, extra_layers=extra_layers)
extra_layers=extra_layers)
# End batch and end pass event handler # End batch and end pass event handler
def event_handler(event): def event_handler(event):
......
...@@ -24,18 +24,17 @@ def vgg(input, nums, class_dim): ...@@ -24,18 +24,17 @@ def vgg(input, nums, class_dim):
conv5 = conv_block(conv4, 512, nums[4]) conv5 = conv_block(conv4, 512, nums[4])
fc_dim = 4096 fc_dim = 4096
fc1 = paddle.layer.fc( fc1 = paddle.layer.fc(input=conv5,
input=conv5, size=fc_dim,
size=fc_dim, act=paddle.activation.Relu(),
act=paddle.activation.Relu(), layer_attr=paddle.attr.Extra(drop_rate=0.5))
layer_attr=paddle.attr.Extra(drop_rate=0.5)) fc2 = paddle.layer.fc(input=fc1,
fc2 = paddle.layer.fc( size=fc_dim,
input=fc1, act=paddle.activation.Relu(),
size=fc_dim, layer_attr=paddle.attr.Extra(drop_rate=0.5))
act=paddle.activation.Relu(), out = paddle.layer.fc(input=fc2,
layer_attr=paddle.attr.Extra(drop_rate=0.5)) size=class_dim,
out = paddle.layer.fc( act=paddle.activation.Softmax())
input=fc2, size=class_dim, act=paddle.activation.Softmax())
return out return out
......
...@@ -185,9 +185,8 @@ def xception(input, class_dim): ...@@ -185,9 +185,8 @@ def xception(input, class_dim):
stride=1, stride=1,
num_channels=2048, num_channels=2048,
pool_type=paddle.pooling.CudnnAvg()) pool_type=paddle.pooling.CudnnAvg())
out = paddle.layer.fc( out = paddle.layer.fc(name='xception_fc',
name='xception_fc', input=pool,
input=pool, size=class_dim,
size=class_dim, act=paddle.activation.Softmax())
act=paddle.activation.Softmax())
return out return out
...@@ -19,22 +19,19 @@ def lambda_rank(input_dim, is_infer=False): ...@@ -19,22 +19,19 @@ def lambda_rank(input_dim, is_infer=False):
paddle.data_type.dense_vector_sequence(input_dim)) paddle.data_type.dense_vector_sequence(input_dim))
# Define the hidden layer. # Define the hidden layer.
hd1 = paddle.layer.fc( hd1 = paddle.layer.fc(input=data,
input=data, size=128,
size=128, act=paddle.activation.Tanh(),
act=paddle.activation.Tanh(), param_attr=paddle.attr.Param(initial_std=0.01))
param_attr=paddle.attr.Param(initial_std=0.01))
hd2 = paddle.layer.fc(input=hd1,
hd2 = paddle.layer.fc( size=10,
input=hd1, act=paddle.activation.Tanh(),
size=10, param_attr=paddle.attr.Param(initial_std=0.01))
act=paddle.activation.Tanh(), output = paddle.layer.fc(input=hd2,
param_attr=paddle.attr.Param(initial_std=0.01)) size=1,
output = paddle.layer.fc( act=paddle.activation.Linear(),
input=hd2, param_attr=paddle.attr.Param(initial_std=0.01))
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))
if not is_infer: if not is_infer:
label = paddle.layer.data("label", label = paddle.layer.data("label",
......
...@@ -17,20 +17,20 @@ def half_ranknet(name_prefix, input_dim): ...@@ -17,20 +17,20 @@ def half_ranknet(name_prefix, input_dim):
paddle.data_type.dense_vector(input_dim)) paddle.data_type.dense_vector(input_dim))
# hidden layer # hidden layer
hd1 = paddle.layer.fc( hd1 = paddle.layer.fc(input=data,
input=data, name=name_prefix + "_hidden",
name=name_prefix + "_hidden", size=10,
size=10, act=paddle.activation.Tanh(),
act=paddle.activation.Tanh(), param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1")) initial_std=0.01, name="hidden_w1"))
# fully connected layer and output layer # fully connected layer and output layer
output = paddle.layer.fc( output = paddle.layer.fc(input=hd1,
input=hd1, name=name_prefix + "_score",
name=name_prefix + "_score", size=1,
size=1, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param(initial_std=0.01, name="output")) initial_std=0.01, name="output"))
return output return output
......
...@@ -16,7 +16,8 @@ logger.setLevel(logging.INFO) ...@@ -16,7 +16,8 @@ logger.setLevel(logging.INFO)
def ranknet_train(input_dim, num_passes, model_save_dir): def ranknet_train(input_dim, num_passes, model_save_dir):
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mq2007.train, buf_size=100), paddle.reader.shuffle(
paddle.dataset.mq2007.train, buf_size=100),
batch_size=100) batch_size=100)
test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100) test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100)
...@@ -70,7 +71,8 @@ def lambda_rank_train(input_dim, num_passes, model_save_dir): ...@@ -70,7 +71,8 @@ def lambda_rank_train(input_dim, num_passes, model_save_dir):
paddle.dataset.mq2007.test, format="listwise") paddle.dataset.mq2007.test, format="listwise")
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32) paddle.reader.shuffle(
fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32) test_reader = paddle.batch(fill_default_test, batch_size=32)
cost = lambda_rank(input_dim) cost = lambda_rank(input_dim)
......
...@@ -74,18 +74,16 @@ class ExternalMemory(object): ...@@ -74,18 +74,16 @@ class ExternalMemory(object):
"""Get write/read head's addressing weights via content-based addressing. """Get write/read head's addressing weights via content-based addressing.
""" """
# content-based addressing: a=tanh(W*M + U*key) # content-based addressing: a=tanh(W*M + U*key)
key_projection = paddle.layer.fc( key_projection = paddle.layer.fc(input=key_vector,
input=key_vector, size=self.mem_slot_size,
size=self.mem_slot_size, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=False)
bias_attr=False)
key_proj_expanded = paddle.layer.expand( key_proj_expanded = paddle.layer.expand(
input=key_projection, expand_as=self.external_memory) input=key_projection, expand_as=self.external_memory)
memory_projection = paddle.layer.fc( memory_projection = paddle.layer.fc(input=self.external_memory,
input=self.external_memory, size=self.mem_slot_size,
size=self.mem_slot_size, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=False)
bias_attr=False)
merged_projection = paddle.layer.addto( merged_projection = paddle.layer.addto(
input=[key_proj_expanded, memory_projection], input=[key_proj_expanded, memory_projection],
act=paddle.activation.Tanh()) act=paddle.activation.Tanh())
...@@ -101,11 +99,10 @@ class ExternalMemory(object): ...@@ -101,11 +99,10 @@ class ExternalMemory(object):
"""Interpolate between previous and current addressing weights. """Interpolate between previous and current addressing weights.
""" """
# prepare interpolation scalar gate: g=sigmoid(W*key) # prepare interpolation scalar gate: g=sigmoid(W*key)
gate = paddle.layer.fc( gate = paddle.layer.fc(input=key_vector,
input=key_vector, size=1,
size=1, act=paddle.activation.Sigmoid(),
act=paddle.activation.Sigmoid(), bias_attr=False)
bias_attr=False)
# interpolation: w_t = g*w_t+(1-g)*w_{t-1} # interpolation: w_t = g*w_t+(1-g)*w_{t-1}
last_addressing_weight = paddle.layer.memory( last_addressing_weight = paddle.layer.memory(
name=self.name + "_addressing_weight_" + head_name, name=self.name + "_addressing_weight_" + head_name,
...@@ -114,7 +111,8 @@ class ExternalMemory(object): ...@@ -114,7 +111,8 @@ class ExternalMemory(object):
interpolated_weight = paddle.layer.interpolation( interpolated_weight = paddle.layer.interpolation(
name=self.name + "_addressing_weight_" + head_name, name=self.name + "_addressing_weight_" + head_name,
input=[last_addressing_weight, addressing_weight], input=[last_addressing_weight, addressing_weight],
weight=paddle.layer.expand(input=gate, expand_as=addressing_weight)) weight=paddle.layer.expand(
input=gate, expand_as=addressing_weight))
return interpolated_weight return interpolated_weight
def _get_addressing_weight(self, head_name, key_vector): def _get_addressing_weight(self, head_name, key_vector):
...@@ -143,16 +141,14 @@ class ExternalMemory(object): ...@@ -143,16 +141,14 @@ class ExternalMemory(object):
# get addressing weight for write head # get addressing weight for write head
write_weight = self._get_addressing_weight("write_head", write_key) write_weight = self._get_addressing_weight("write_head", write_key)
# prepare add_vector and erase_vector # prepare add_vector and erase_vector
erase_vector = paddle.layer.fc( erase_vector = paddle.layer.fc(input=write_key,
input=write_key, size=self.mem_slot_size,
size=self.mem_slot_size, act=paddle.activation.Sigmoid(),
act=paddle.activation.Sigmoid(), bias_attr=False)
bias_attr=False) add_vector = paddle.layer.fc(input=write_key,
add_vector = paddle.layer.fc( size=self.mem_slot_size,
input=write_key, act=paddle.activation.Sigmoid(),
size=self.mem_slot_size, bias_attr=False)
act=paddle.activation.Sigmoid(),
bias_attr=False)
erase_vector_expand = paddle.layer.expand( erase_vector_expand = paddle.layer.expand(
input=erase_vector, expand_as=self.external_memory) input=erase_vector, expand_as=self.external_memory)
add_vector_expand = paddle.layer.expand( add_vector_expand = paddle.layer.expand(
......
...@@ -81,8 +81,8 @@ def parse_beam_search_result(beam_result, dictionary): ...@@ -81,8 +81,8 @@ def parse_beam_search_result(beam_result, dictionary):
if word != -1: if word != -1:
sentence.append(word) sentence.append(word)
else: else:
sentence_list.append( sentence_list.append(' '.join(
' '.join([dictionary.get(word) for word in sentence[1:]])) [dictionary.get(word) for word in sentence[1:]]))
sentence = [] sentence = []
beam_probs = beam_result[0] beam_probs = beam_result[0]
beam_size = len(beam_probs[0]) beam_size = len(beam_probs[0])
...@@ -127,7 +127,9 @@ def infer(): ...@@ -127,7 +127,9 @@ def infer():
append_tuple=(bounded_memory_perturbation, )) append_tuple=(bounded_memory_perturbation, ))
for i, item in enumerate(test_append_reader()): for i, item in enumerate(test_append_reader()):
if i < args.infer_data_num: if i < args.infer_data_num:
infer_data.append((item[0], item[3], )) infer_data.append((
item[0],
item[3], ))
# run inference # run inference
beam_result = paddle.infer( beam_result = paddle.infer(
......
...@@ -109,11 +109,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size, ...@@ -109,11 +109,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
:rtype: LayerOutput :rtype: LayerOutput
""" """
# prepare initial bounded and unbounded memory # prepare initial bounded and unbounded memory
bounded_memory_slot_init = paddle.layer.fc( bounded_memory_slot_init = paddle.layer.fc(input=paddle.layer.pooling(
input=paddle.layer.pooling( input=source_context, pooling_type=paddle.pooling.Avg()),
input=source_context, pooling_type=paddle.pooling.Avg()), size=size,
size=size, act=paddle.activation.Sigmoid())
act=paddle.activation.Sigmoid())
bounded_memory_perturbation = paddle.layer.data( bounded_memory_perturbation = paddle.layer.data(
name='bounded_memory_perturbation', name='bounded_memory_perturbation',
type=paddle.data_type.dense_vector_sequence(size)) type=paddle.data_type.dense_vector_sequence(size))
...@@ -175,11 +174,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size, ...@@ -175,11 +174,10 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
gru_output = paddle.layer.gru_step( gru_output = paddle.layer.gru_step(
name="gru_decoder", input=gru_inputs, output_mem=state, size=size) name="gru_decoder", input=gru_inputs, output_mem=state, size=size)
# step output # step output
return paddle.layer.fc( return paddle.layer.fc(input=[gru_output, context, cur_embedding],
input=[gru_output, context, cur_embedding], size=dict_size,
size=dict_size, act=paddle.activation.Softmax(),
act=paddle.activation.Softmax(), bias_attr=True)
bias_attr=True)
if not is_generating: if not is_generating:
target_embeddings = paddle.layer.embedding( target_embeddings = paddle.layer.embedding(
......
...@@ -94,8 +94,9 @@ def train(): ...@@ -94,8 +94,9 @@ def train():
# create parameters and trainer # create parameters and trainer
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
# create data readers # create data readers
feeding = { feeding = {
...@@ -113,13 +114,15 @@ def train(): ...@@ -113,13 +114,15 @@ def train():
reader=paddle.dataset.wmt14.train(args.dict_size), reader=paddle.dataset.wmt14.train(args.dict_size),
append_tuple=(bounded_memory_perturbation, )) append_tuple=(bounded_memory_perturbation, ))
train_batch_reader = paddle.batch( train_batch_reader = paddle.batch(
reader=paddle.reader.shuffle(reader=train_append_reader, buf_size=8192), reader=paddle.reader.shuffle(
reader=train_append_reader, buf_size=8192),
batch_size=args.batch_size) batch_size=args.batch_size)
test_append_reader = reader_append_wrapper( test_append_reader = reader_append_wrapper(
reader=paddle.dataset.wmt14.test(args.dict_size), reader=paddle.dataset.wmt14.test(args.dict_size),
append_tuple=(bounded_memory_perturbation, )) append_tuple=(bounded_memory_perturbation, ))
test_batch_reader = paddle.batch( test_batch_reader = paddle.batch(
reader=paddle.reader.shuffle(reader=test_append_reader, buf_size=8192), reader=paddle.reader.shuffle(
reader=test_append_reader, buf_size=8192),
batch_size=args.batch_size) batch_size=args.batch_size)
# create event handler # create event handler
......
...@@ -27,14 +27,13 @@ def ngram_lm(hidden_size, emb_size, dict_size, gram_num=4, is_train=True): ...@@ -27,14 +27,13 @@ def ngram_lm(hidden_size, emb_size, dict_size, gram_num=4, is_train=True):
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(emb_size * 8))) param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(emb_size * 8)))
if is_train: if is_train:
return paddle.layer.nce( return paddle.layer.nce(input=hidden_layer,
input=hidden_layer, label=next_word,
label=next_word, num_classes=dict_size,
num_classes=dict_size, param_attr=paddle.attr.Param(name="nce_w"),
param_attr=paddle.attr.Param(name="nce_w"), bias_attr=paddle.attr.Param(name="nce_b"),
bias_attr=paddle.attr.Param(name="nce_b"), num_neg_samples=25,
num_neg_samples=25, neg_distribution=None)
neg_distribution=None)
else: else:
return paddle.layer.mixed( return paddle.layer.mixed(
size=dict_size, size=dict_size,
......
...@@ -43,7 +43,8 @@ def train(model_save_dir): ...@@ -43,7 +43,8 @@ def train(model_save_dir):
paddle.batch( paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
lambda: paddle.dataset.imikolov.train(word_dict, 5)(), lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
buf_size=1000), 64), buf_size=1000),
64),
num_passes=1000, num_passes=1000,
event_handler=event_handler) event_handler=event_handler)
......
...@@ -17,12 +17,11 @@ def cnn_cov_group(group_input, hidden_size): ...@@ -17,12 +17,11 @@ def cnn_cov_group(group_input, hidden_size):
fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight') fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight')
fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias') fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias')
linear_proj = paddle.layer.fc( linear_proj = paddle.layer.fc(input=[conv3, conv4],
input=[conv3, conv4], size=hidden_size,
size=hidden_size, param_attr=[fc_param_attr, fc_param_attr],
param_attr=[fc_param_attr, fc_param_attr], bias_attr=fc_bias_attr,
bias_attr=fc_bias_attr, act=paddle.activation.Linear())
act=paddle.activation.Linear())
return linear_proj return linear_proj
......
...@@ -24,8 +24,8 @@ def tokenize(pattern): ...@@ -24,8 +24,8 @@ def tokenize(pattern):
""" """
Read files that match the given pattern. Tokenize and yield each file. Read files that match the given pattern. Tokenize and yield each file.
""" """
with tarfile.open( with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
paddle.v2.dataset.common.download(URL, 'imdb', MD5)) as tarf: MD5)) as tarf:
tf = tarf.next() tf = tarf.next()
while tf != None: while tf != None:
if bool(pattern.match(tf.name)): if bool(pattern.match(tf.name)):
...@@ -77,11 +77,17 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): ...@@ -77,11 +77,17 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):
def reader(): def reader():
# Creates two threads that loads positive and negative samples # Creates two threads that loads positive and negative samples
# into qs. # into qs.
t0 = threading.Thread(target=load, args=(pos_pattern, qs[0], )) t0 = threading.Thread(
target=load, args=(
pos_pattern,
qs[0], ))
t0.daemon = True t0.daemon = True
t0.start() t0.start()
t1 = threading.Thread(target=load, args=(neg_pattern, qs[1], )) t1 = threading.Thread(
target=load, args=(
neg_pattern,
qs[1], ))
t1.daemon = True t1.daemon = True
t1.start() t1.start()
......
...@@ -62,16 +62,17 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim, ...@@ -62,16 +62,17 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
emb = get_embedding(question, word_vec_dim, wordvecs) emb = get_embedding(question, word_vec_dim, wordvecs)
# question LSTM # question LSTM
wx = paddle.layer.fc( wx = paddle.layer.fc(act=paddle.activation.Linear(),
act=paddle.activation.Linear(), size=q_lstm_dim * 4,
size=q_lstm_dim * 4, input=emb,
input=emb, param_attr=paddle.attr.ParamAttr(
param_attr=paddle.attr.ParamAttr( name="_q_hidden1.w0",
name="_q_hidden1.w0", initial_std=default_init_std,
initial_std=default_init_std, l2_rate=default_l2_rate),
l2_rate=default_l2_rate), bias_attr=paddle.attr.ParamAttr(
bias_attr=paddle.attr.ParamAttr( name="_q_hidden1.wbias",
name="_q_hidden1.wbias", initial_std=0, l2_rate=default_l2_rate)) initial_std=0,
l2_rate=default_l2_rate))
q_rnn = paddle.layer.lstmemory( q_rnn = paddle.layer.lstmemory(
input=wx, input=wx,
bias_attr=paddle.attr.ParamAttr( bias_attr=paddle.attr.ParamAttr(
...@@ -83,24 +84,22 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim, ...@@ -83,24 +84,22 @@ def encoding_question(question, q_lstm_dim, latent_chain_dim, word_vec_dim,
q_rnn = paddle.layer.dropout(q_rnn, drop_rate) q_rnn = paddle.layer.dropout(q_rnn, drop_rate)
# self attention # self attention
fc = paddle.layer.fc( fc = paddle.layer.fc(act=paddle.activation.Tanh(),
act=paddle.activation.Tanh(), size=latent_chain_dim,
size=latent_chain_dim, input=q_rnn,
input=q_rnn, param_attr=paddle.attr.ParamAttr(
param_attr=paddle.attr.ParamAttr( name="_attention_layer1.w0",
name="_attention_layer1.w0", initial_std=default_init_std,
initial_std=default_init_std, l2_rate=default_l2_rate),
l2_rate=default_l2_rate), bias_attr=False)
bias_attr=False) weight = paddle.layer.fc(size=1,
weight = paddle.layer.fc( act=paddle.activation.SequenceSoftmax(),
size=1, input=fc,
act=paddle.activation.SequenceSoftmax(), param_attr=paddle.attr.ParamAttr(
input=fc, name="_attention_weight.w0",
param_attr=paddle.attr.ParamAttr( initial_std=default_init_std,
name="_attention_weight.w0", l2_rate=default_l2_rate),
initial_std=default_init_std, bias_attr=False)
l2_rate=default_l2_rate),
bias_attr=False)
scaled_q_rnn = paddle.layer.scaling(input=q_rnn, weight=weight) scaled_q_rnn = paddle.layer.scaling(input=q_rnn, weight=weight)
...@@ -262,15 +261,14 @@ def define_common_network(conf): ...@@ -262,15 +261,14 @@ def define_common_network(conf):
conf.default_init_std, conf.default_l2_rate) conf.default_init_std, conf.default_l2_rate)
# pre-compute CRF features # pre-compute CRF features
crf_feats = paddle.layer.fc( crf_feats = paddle.layer.fc(act=paddle.activation.Linear(),
act=paddle.activation.Linear(), input=e_encoding,
input=e_encoding, size=conf.label_num,
size=conf.label_num, param_attr=paddle.attr.ParamAttr(
param_attr=paddle.attr.ParamAttr( name="_output.w0",
name="_output.w0", initial_std=conf.default_init_std,
initial_std=conf.default_init_std, l2_rate=conf.default_l2_rate),
l2_rate=conf.default_l2_rate), bias_attr=False)
bias_attr=False)
return crf_feats, label return crf_feats, label
...@@ -283,15 +281,14 @@ def training_net(conf): ...@@ -283,15 +281,14 @@ def training_net(conf):
:rtype: LayerOutput :rtype: LayerOutput
""" """
e_encoding, label = define_common_network(conf) e_encoding, label = define_common_network(conf)
crf = paddle.layer.crf( crf = paddle.layer.crf(input=e_encoding,
input=e_encoding, label=label,
label=label, size=conf.label_num,
size=conf.label_num, param_attr=paddle.attr.ParamAttr(
param_attr=paddle.attr.ParamAttr( name="_crf.w0",
name="_crf.w0", initial_std=conf.default_init_std,
initial_std=conf.default_init_std, l2_rate=conf.default_l2_rate),
l2_rate=conf.default_l2_rate), layer_attr=paddle.attr.ExtraAttr(device=-1))
layer_attr=paddle.attr.ExtraAttr(device=-1))
return crf return crf
......
...@@ -21,7 +21,8 @@ def save_model(trainer, model_save_dir, parameters, pass_id): ...@@ -21,7 +21,8 @@ def save_model(trainer, model_save_dir, parameters, pass_id):
def show_parameter_init_info(parameters): def show_parameter_init_info(parameters):
""" """
Print the information of initialization mean and standard deviation of parameters Print the information of initialization mean and standard deviation of
parameters
:param parameters: the parameters created in a model :param parameters: the parameters created in a model
""" """
...@@ -98,8 +99,9 @@ def train(conf): ...@@ -98,8 +99,9 @@ def train(conf):
max_average_window=conf.max_average_window)) max_average_window=conf.max_average_window))
# create trainer # create trainer
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=rmsprop_optimizer) parameters=parameters,
update_equation=rmsprop_optimizer)
# begin training network # begin training network
def _event_handler(event): def _event_handler(event):
......
...@@ -5,7 +5,12 @@ import sys ...@@ -5,7 +5,12 @@ import sys
import numpy import numpy
__all__ = [ __all__ = [
"open_file", "cumsum", "logger", "DotBar", "load_dict", "load_wordvecs" "open_file",
"cumsum",
"logger",
"DotBar",
"load_dict",
"load_wordvecs",
] ]
logger = logging.getLogger("paddle") logger = logging.getLogger("paddle")
...@@ -64,8 +69,7 @@ class DotBar(object): ...@@ -64,8 +69,7 @@ class DotBar(object):
self.dots_per_line = dots_per_line self.dots_per_line = dots_per_line
self.f = f self.f = f
def __enter__( def __enter__(self, ):
self, ):
self.obj.__enter__() self.obj.__enter__()
self.idx = 0 self.idx = 0
return self return self
......
...@@ -48,8 +48,9 @@ def seq2seq_net(source_dict_dim, ...@@ -48,8 +48,9 @@ def seq2seq_net(source_dict_dim,
return_seq=True) return_seq=True)
#### Decoder #### Decoder
encoder_last = paddle.layer.last_seq(input=encoded_vector) encoder_last = paddle.layer.last_seq(input=encoded_vector)
encoder_last_projected = paddle.layer.fc( encoder_last_projected = paddle.layer.fc(size=decoder_size,
size=decoder_size, act=paddle.activation.Tanh(), input=encoder_last) act=paddle.activation.Tanh(),
input=encoder_last)
# gru step # gru step
def gru_decoder_without_attention(enc_vec, current_word): def gru_decoder_without_attention(enc_vec, current_word):
...@@ -68,8 +69,8 @@ def seq2seq_net(source_dict_dim, ...@@ -68,8 +69,8 @@ def seq2seq_net(source_dict_dim,
context = paddle.layer.last_seq(input=enc_vec) context = paddle.layer.last_seq(input=enc_vec)
decoder_inputs = paddle.layer.fc( decoder_inputs = paddle.layer.fc(size=decoder_size * 3,
size=decoder_size * 3, input=[context, current_word]) input=[context, current_word])
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name="gru_decoder", name="gru_decoder",
...@@ -79,11 +80,10 @@ def seq2seq_net(source_dict_dim, ...@@ -79,11 +80,10 @@ def seq2seq_net(source_dict_dim,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
out = paddle.layer.fc( out = paddle.layer.fc(size=target_dict_dim,
size=target_dict_dim, bias_attr=True,
bias_attr=True, act=paddle.activation.Softmax(),
act=paddle.activation.Softmax(), input=gru_step)
input=gru_step)
return out return out
group_input1 = paddle.layer.StaticInput(input=encoded_vector) group_input1 = paddle.layer.StaticInput(input=encoded_vector)
......
...@@ -33,8 +33,9 @@ def train(save_dir_path, source_dict_dim, target_dict_dim): ...@@ -33,8 +33,9 @@ def train(save_dir_path, source_dict_dim, target_dict_dim):
learning_rate=1e-3, learning_rate=1e-3,
gradient_clipping_threshold=10.0, gradient_clipping_threshold=10.0,
regularization=paddle.optimizer.L2Regularization(rate=8e-4)) regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
# define data reader # define data reader
wmt14_reader = paddle.batch( wmt14_reader = paddle.batch(
......
...@@ -70,8 +70,8 @@ def infer(model_path, image_shape, batch_size, label_dict_path, ...@@ -70,8 +70,8 @@ def infer(model_path, image_shape, batch_size, label_dict_path,
test_batch = [] test_batch = []
labels = [] labels = []
for i, (image, for i, (image, label
label) in enumerate(data_generator.infer_reader(infer_file_list)()): ) in enumerate(data_generator.infer_reader(infer_file_list)()):
test_batch.append([image]) test_batch.append([image])
labels.append(label) labels.append(label)
if len(test_batch) == batch_size: if len(test_batch) == batch_size:
......
...@@ -69,10 +69,9 @@ class Model(object): ...@@ -69,10 +69,9 @@ class Model(object):
reverse=True) reverse=True)
# Map the output of RNN to character distribution. # Map the output of RNN to character distribution.
self.output = layer.fc( self.output = layer.fc(input=[gru_forward, gru_backward],
input=[gru_forward, gru_backward], size=self.num_classes + 1,
size=self.num_classes + 1, act=Linear())
act=Linear())
self.log_probs = paddle.layer.mixed( self.log_probs = paddle.layer.mixed(
input=paddle.layer.identity_projection(input=self.output), input=paddle.layer.identity_projection(input=self.output),
......
...@@ -62,11 +62,10 @@ def train(train_file_list_path, test_file_list_path, label_dict_path, ...@@ -62,11 +62,10 @@ def train(train_file_list_path, test_file_list_path, label_dict_path,
# Create all the trainable parameters. # Create all the trainable parameters.
params = paddle.parameters.create(model.cost) params = paddle.parameters.create(model.cost)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=model.cost,
cost=model.cost, parameters=params,
parameters=params, update_equation=optimizer,
update_equation=optimizer, extra_layers=model.eval)
extra_layers=model.eval)
# Feeding dictionary. # Feeding dictionary.
feeding = {'image': 0, 'label': 1} feeding = {'image': 0, 'label': 1}
......
...@@ -41,19 +41,17 @@ def seqToseq_net(source_dict_dim, ...@@ -41,19 +41,17 @@ def seqToseq_net(source_dict_dim,
encoded_vector = paddle.layer.concat(input=[src_forward, src_reverse]) encoded_vector = paddle.layer.concat(input=[src_forward, src_reverse])
#### Decoder #### Decoder
encoded_proj = paddle.layer.fc( encoded_proj = paddle.layer.fc(input=encoded_vector,
input=encoded_vector, size=decoder_size,
size=decoder_size, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=False)
bias_attr=False)
reverse_first = paddle.layer.first_seq(input=src_reverse) reverse_first = paddle.layer.first_seq(input=src_reverse)
decoder_boot = paddle.layer.fc( decoder_boot = paddle.layer.fc(input=reverse_first,
input=reverse_first, size=decoder_size,
size=decoder_size, act=paddle.activation.Tanh(),
act=paddle.activation.Tanh(), bias_attr=False)
bias_attr=False)
def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word, def gru_decoder_with_attention_train(enc_vec, enc_proj, true_word,
true_token_flag): true_token_flag):
...@@ -92,11 +90,10 @@ def seqToseq_net(source_dict_dim, ...@@ -92,11 +90,10 @@ def seqToseq_net(source_dict_dim,
current_word = paddle.layer.multiplex( current_word = paddle.layer.multiplex(
input=[true_token_flag, true_word, generated_word_emb]) input=[true_token_flag, true_word, generated_word_emb])
decoder_inputs = paddle.layer.fc( decoder_inputs = paddle.layer.fc(input=[context, current_word],
input=[context, current_word], size=decoder_size * 3,
size=decoder_size * 3, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=False)
bias_attr=False)
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
...@@ -104,11 +101,10 @@ def seqToseq_net(source_dict_dim, ...@@ -104,11 +101,10 @@ def seqToseq_net(source_dict_dim,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
out = paddle.layer.fc( out = paddle.layer.fc(name='gru_out',
name='gru_out', input=gru_step,
input=gru_step, size=target_dict_dim,
size=target_dict_dim, act=paddle.activation.Softmax())
act=paddle.activation.Softmax())
return out return out
def gru_decoder_with_attention_gen(enc_vec, enc_proj, current_word): def gru_decoder_with_attention_gen(enc_vec, enc_proj, current_word):
...@@ -132,11 +128,10 @@ def seqToseq_net(source_dict_dim, ...@@ -132,11 +128,10 @@ def seqToseq_net(source_dict_dim,
encoded_proj=enc_proj, encoded_proj=enc_proj,
decoder_state=decoder_mem) decoder_state=decoder_mem)
decoder_inputs = paddle.layer.fc( decoder_inputs = paddle.layer.fc(input=[context, current_word],
input=[context, current_word], size=decoder_size * 3,
size=decoder_size * 3, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), bias_attr=False)
bias_attr=False)
gru_step = paddle.layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
...@@ -144,11 +139,10 @@ def seqToseq_net(source_dict_dim, ...@@ -144,11 +139,10 @@ def seqToseq_net(source_dict_dim,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
out = paddle.layer.fc( out = paddle.layer.fc(name='gru_out',
name='gru_out', input=gru_step,
input=gru_step, size=target_dict_dim,
size=target_dict_dim, act=paddle.activation.Softmax())
act=paddle.activation.Softmax())
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
......
...@@ -72,13 +72,16 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a, ...@@ -72,13 +72,16 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a,
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters, update_equation=optimizer) parameters=parameters,
update_equation=optimizer)
wmt14_reader = reader.gen_schedule_data( wmt14_reader = reader.gen_schedule_data(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192), paddle.dataset.wmt14.train(dict_size), buf_size=8192),
schedule_type, decay_a, decay_b) schedule_type,
decay_a,
decay_b)
# define event_handler callback # define event_handler callback
def event_handler(event): def event_handler(event):
...@@ -98,7 +101,8 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a, ...@@ -98,7 +101,8 @@ def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a,
# start to train # start to train
trainer.train( trainer.train(
reader=paddle.batch(wmt14_reader, batch_size=batch_size), reader=paddle.batch(
wmt14_reader, batch_size=batch_size),
event_handler=event_handler, event_handler=event_handler,
feeding=reader.feeding, feeding=reader.feeding,
num_passes=num_passes) num_passes=num_passes)
......
...@@ -79,27 +79,25 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): ...@@ -79,27 +79,25 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
# Please do not add any nonlinear activation to this fully connected layer. # Please do not add any nonlinear activation to this fully connected layer.
# The default activation for paddle.layer.fc is the tanh, here needs to set # The default activation for paddle.layer.fc is the tanh, here needs to set
# it to linear explictly. # it to linear explictly.
emission = paddle.layer.fc( emission = paddle.layer.fc(size=label_dict_len,
size=label_dict_len, bias_attr=False,
bias_attr=False, input=rnn_fea,
input=rnn_fea, act=paddle.activation.Linear(),
act=paddle.activation.Linear(), param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / initial_std=1. / math.sqrt(hidden_dim) / 3))
3))
if is_train: if is_train:
target = paddle.layer.data( target = paddle.layer.data(
name="target", name="target",
type=paddle.data_type.integer_value_sequence(label_dict_len)) type=paddle.data_type.integer_value_sequence(label_dict_len))
crf = paddle.layer.crf( crf = paddle.layer.crf(size=label_dict_len,
size=label_dict_len, input=emission,
input=emission, label=target,
label=target, param_attr=paddle.attr.Param(
param_attr=paddle.attr.Param( name="crfw",
name="crfw", initial_std=1. / math.sqrt(hidden_dim) / 3,
initial_std=1. / math.sqrt(hidden_dim) / 3, learning_rate=mix_hidden_lr))
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding( crf_dec = paddle.layer.crf_decoding(
size=label_dict_len, size=label_dict_len,
......
...@@ -54,11 +54,10 @@ def main(train_data_file, ...@@ -54,11 +54,10 @@ def main(train_data_file,
model_average=paddle.optimizer.ModelAverage( model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), ) average_window=0.5, max_average_window=10000), )
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=crf_cost,
cost=crf_cost, parameters=parameters,
parameters=parameters, update_equation=optimizer,
update_equation=optimizer, extra_layers=crf_dec)
extra_layers=crf_dec)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
......
...@@ -14,11 +14,10 @@ def eval(eval_file_list, batch_size, data_args, model_path): ...@@ -14,11 +14,10 @@ def eval(eval_file_list, batch_size, data_args, model_path):
optimizer = paddle.optimizer.Momentum() optimizer = paddle.optimizer.Momentum()
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters,
parameters=parameters, extra_layers=[detect_out],
extra_layers=[detect_out], update_equation=optimizer)
update_equation=optimizer)
feeding = {'image': 0, 'bbox': 1} feeding = {'image': 0, 'bbox': 1}
......
...@@ -24,11 +24,10 @@ def train(train_file_list, dev_file_list, data_args, init_model_path): ...@@ -24,11 +24,10 @@ def train(train_file_list, dev_file_list, data_args, init_model_path):
assert os.path.isfile(init_model_path), 'Invalid model.' assert os.path.isfile(init_model_path), 'Invalid model.'
parameters.init_from_tar(gzip.open(init_model_path)) parameters.init_from_tar(gzip.open(init_model_path))
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(cost=cost,
cost=cost, parameters=parameters,
parameters=parameters, extra_layers=[detect_out],
extra_layers=[detect_out], update_equation=optimizer)
update_equation=optimizer)
feeding = {'image': 0, 'bbox': 1} feeding = {'image': 0, 'bbox': 1}
......
...@@ -91,8 +91,9 @@ def convolution_net(dict_dim, ...@@ -91,8 +91,9 @@ def convolution_net(dict_dim,
input=emb, context_len=4, hidden_size=hid_dim) input=emb, context_len=4, hidden_size=hid_dim)
# fc and output layer # fc and output layer
prob = paddle.layer.fc( prob = paddle.layer.fc(input=[conv_3, conv_4],
input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax()) size=class_dim,
act=paddle.activation.Softmax())
if is_infer: if is_infer:
return prob return prob
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册