提交 bbfd74a5 编写于 作者: Z Zeyu Chen

Merge branch 'develop' of https://github.com/PaddlePaddle/models into develop

# BERT Benchmark with Fleet API # BERT Benchmark with Fleet API
先配置运行环境 先配置运行环境,clone PaddleNLP的代码,同时下载好预训练的数据到相应的位置
export PYTHONPATH=/home/fangzeyang/PaddleNLP export PYTHONPATH=${HOME}/models/PaddleNLP
export DATA_DIR=/home/fangzeyang/bert_data/wikicorpus_en export DATA_DIR=${HOME}/bert_data/wikicorpus_en
## NLP 任务中的Pretraining ## NLP 任务中的Pretraining
......
...@@ -20,5 +20,5 @@ python3 train.py ...@@ -20,5 +20,5 @@ python3 train.py
``` shell ``` shell
cd dygraph/ cd dygraph/
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python3 -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py
``` ```
...@@ -87,13 +87,6 @@ class CrossEntropyLossForLm(nn.Layer): ...@@ -87,13 +87,6 @@ class CrossEntropyLossForLm(nn.Layer):
class UpdateModel(paddle.callbacks.Callback): class UpdateModel(paddle.callbacks.Callback):
# This callback reset model hidden states and update learning rate before each epoch begins # This callback reset model hidden states and update learning rate before each epoch begins
def __init__(self, base_lr, lr_decay, epoch_start_decay):
self.base_lr = base_lr
self.lr_decay = lr_decay
self.epoch_start_decay = epoch_start_decay
def on_epoch_begin(self, epoch=None, logs=None): def on_epoch_begin(self, epoch=None, logs=None):
self.model.network.reset_states() self.model.network.reset_states()
new_lr = self.base_lr * (self.lr_decay
**max(epoch + 1 - self.epoch_start_decay, 0.0))
self.model._optimizer.set_lr(new_lr)
...@@ -13,8 +13,9 @@ paddle.seed(102) ...@@ -13,8 +13,9 @@ paddle.seed(102)
def create_data_loader(batch_size, num_steps, data_path): def create_data_loader(batch_size, num_steps, data_path):
train_ds, valid_ds, test_ds = PTBDataset.get_datasets( train_ds = PTBDataset(batch_size, num_steps, 'train')
[batch_size] * 3, [num_steps] * 3, ['train', 'eval', 'test']) valid_ds = PTBDataset(batch_size, num_steps, 'eval')
test_ds = PTBDataset(batch_size, num_steps, 'test')
train_loader = DataLoader(train_ds, return_list=True, batch_size=None) train_loader = DataLoader(train_ds, return_list=True, batch_size=None)
valid_loader = DataLoader(valid_ds, return_list=True, batch_size=None) valid_loader = DataLoader(valid_ds, return_list=True, batch_size=None)
...@@ -40,15 +41,15 @@ def train(args): ...@@ -40,15 +41,15 @@ def train(args):
gloabl_norm_clip = paddle.nn.ClipGradByGlobalNorm(args.max_grad_norm) gloabl_norm_clip = paddle.nn.ClipGradByGlobalNorm(args.max_grad_norm)
cross_entropy = CrossEntropyLossForLm() cross_entropy = CrossEntropyLossForLm()
ppl_metric = Perplexity() ppl_metric = Perplexity()
callback = UpdateModel( callback = UpdateModel()
base_lr=args.base_lr, scheduler = paddle.callbacks.LRScheduler(by_step=False, by_epoch=True)
lr_decay=args.lr_decay,
epoch_start_decay=args.epoch_start_decay)
model = paddle.Model(network) model = paddle.Model(network)
# FIXME(yuanxiaopeng): Use scheduler instead of callback
#scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=args.base_lr, lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0), verbose=True) learning_rate = paddle.optimizer.lr.LambdaDecay(
optimizer = paddle.optimizer.SGD(learning_rate=args.base_lr, learning_rate=args.base_lr,
lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0),
verbose=True)
optimizer = paddle.optimizer.SGD(learning_rate=learning_rate,
parameters=model.parameters(), parameters=model.parameters(),
grad_clip=gloabl_norm_clip) grad_clip=gloabl_norm_clip)
...@@ -62,7 +63,7 @@ def train(args): ...@@ -62,7 +63,7 @@ def train(args):
eval_data=valid_loader, eval_data=valid_loader,
epochs=args.max_epoch, epochs=args.max_epoch,
shuffle=False, shuffle=False,
callbacks=[callback], callbacks=[callback, scheduler],
log_freq=max(1, len(train_loader) // 10)) log_freq=max(1, len(train_loader) // 10))
model.save(path='checkpoint/test') # save for training model.save(path='checkpoint/test') # save for training
......
...@@ -66,7 +66,7 @@ python train.py ...@@ -66,7 +66,7 @@ python train.py
```sh ```sh
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py
``` ```
......
...@@ -49,7 +49,7 @@ def create_infer_loader(args): ...@@ -49,7 +49,7 @@ def create_infer_loader(args):
prepare_infer_input, prepare_infer_input,
bos_idx=args.bos_idx, bos_idx=args.bos_idx,
eos_idx=args.eos_idx, eos_idx=args.eos_idx,
pad_idx=args.eos_idx), pad_idx=args.bos_idx),
num_workers=0, num_workers=0,
return_list=True) return_list=True)
data_loaders = (data_loader, batch_sampler.__len__) data_loaders = (data_loader, batch_sampler.__len__)
......
...@@ -445,6 +445,9 @@ class SQuAD(Dataset): ...@@ -445,6 +445,9 @@ class SQuAD(Dataset):
self.examples = examples self.examples = examples
def __len__(self):
return len(self.data)
def __getitem__(self, idx): def __getitem__(self, idx):
feature = self.data[idx] feature = self.data[idx]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册