提交 6431daed 编写于 作者: G guosheng

Fix condition var is None

上级 3c682920
......@@ -91,8 +91,6 @@ def do_predict(args):
dataset=dataset,
batch_sampler=batch_sampler,
places=device,
feed_list=None
if fluid.in_dygraph_mode() else [x.forward() for x in inputs],
collate_fn=partial(
prepare_infer_input, src_pad_idx=args.eos_idx, n_head=args.n_head),
num_workers=0,
......
......@@ -22,7 +22,6 @@ from functools import partial
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.io import DataLoader
from utils.configure import PDConfig
......@@ -31,32 +30,33 @@ from utils.check import check_gpu, check_version
from model import Input, set_device
from callbacks import ProgBarLogger
from reader import prepare_train_input, Seq2SeqDataset, Seq2SeqBatchSampler
from transformer import Transformer, CrossEntropyCriterion, NoamDecay
from transformer import Transformer, CrossEntropyCriterion
class LoggerCallback(ProgBarLogger):
class TrainCallback(ProgBarLogger):
def __init__(self, log_freq=1, verbose=2, loss_normalizer=0.):
super(LoggerCallback, self).__init__(log_freq, verbose)
super(TrainCallback, self).__init__(log_freq, verbose)
# TODO: wrap these override function to simplify
self.loss_normalizer = loss_normalizer
def on_train_begin(self, logs=None):
super(LoggerCallback, self).on_train_begin(logs)
super(TrainCallback, self).on_train_begin(logs)
self.train_metrics += ["normalized loss", "ppl"]
def on_train_batch_end(self, step, logs=None):
logs["normalized loss"] = logs["loss"][0] - self.loss_normalizer
logs["ppl"] = np.exp(min(logs["loss"][0], 100))
super(LoggerCallback, self).on_train_batch_end(step, logs)
super(TrainCallback, self).on_train_batch_end(step, logs)
def on_eval_begin(self, logs=None):
super(LoggerCallback, self).on_eval_begin(logs)
self.eval_metrics += ["normalized loss", "ppl"]
super(TrainCallback, self).on_eval_begin(logs)
self.eval_metrics = list(
self.eval_metrics) + ["normalized loss", "ppl"]
def on_eval_batch_end(self, step, logs=None):
logs["normalized loss"] = logs["loss"][0] - self.loss_normalizer
logs["ppl"] = np.exp(min(logs["loss"][0], 100))
super(LoggerCallback, self).on_eval_batch_end(step, logs)
super(TrainCallback, self).on_eval_batch_end(step, logs)
def do_train(args):
......@@ -127,8 +127,6 @@ def do_train(args):
dataset=dataset,
batch_sampler=batch_sampler,
places=device,
feed_list=None if fluid.in_dygraph_mode() else
[x.forward() for x in inputs + labels],
collate_fn=partial(
prepare_train_input,
src_pad_idx=args.eos_idx,
......@@ -149,8 +147,10 @@ def do_train(args):
transformer.prepare(
fluid.optimizer.Adam(
learning_rate=fluid.layers.noam_decay(args.d_model,
args.warmup_steps),
learning_rate=fluid.layers.noam_decay(
args.d_model,
args.warmup_steps,
learning_rate=args.learning_rate),
beta1=args.beta1,
beta2=args.beta2,
epsilon=float(args.eps),
......@@ -161,13 +161,10 @@ def do_train(args):
## init from some checkpoint, to resume the previous training
if args.init_from_checkpoint:
transformer.load(
os.path.join(args.init_from_checkpoint, "transformer"))
transformer.load(args.init_from_checkpoint)
## init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model:
transformer.load(
os.path.join(args.init_from_pretrain_model, "transformer"),
reset_optimizer=True)
transformer.load(args.init_from_pretrain_model, reset_optimizer=True)
# the best cross-entropy value with label smoothing
loss_normalizer = -(
......@@ -178,12 +175,13 @@ def do_train(args):
# model train
transformer.fit(train_data=train_loader,
eval_data=eval_loader,
epochs=1,
epochs=args.epoch,
eval_freq=1,
save_freq=1,
save_dir=args.save_model,
verbose=2,
callbacks=[
LoggerCallback(
TrainCallback(
log_freq=args.print_step,
loss_normalizer=loss_normalizer)
])
......
......@@ -79,7 +79,8 @@ class PrePostProcessLayer(Layer):
self.functors = []
for cmd in self.process_cmd:
if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x)
self.functors.append(
lambda x, y: x + y if y is not None else x)
elif cmd == "n": # add layer normalization
self.functors.append(
self.add_sublayer(
......@@ -169,7 +170,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention
product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
if attn_bias:
if attn_bias is not None:
product += attn_bias
weights = layers.softmax(product)
if self.dropout_rate:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册