提交 bbfd74a5 编写于 作者: Z Zeyu Chen

Merge branch 'develop' of https://github.com/PaddlePaddle/models into develop

# BERT Benchmark with Fleet API
先配置运行环境
export PYTHONPATH=/home/fangzeyang/PaddleNLP
export DATA_DIR=/home/fangzeyang/bert_data/wikicorpus_en
先配置运行环境,clone PaddleNLP的代码,同时下载好预训练的数据到相应的位置
export PYTHONPATH=${HOME}/models/PaddleNLP
export DATA_DIR=${HOME}/bert_data/wikicorpus_en
## NLP 任务中的Pretraining
......
......@@ -20,5 +20,5 @@ python3 train.py
``` shell
cd dygraph/
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python3 -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py
python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py
```
......@@ -87,13 +87,6 @@ class CrossEntropyLossForLm(nn.Layer):
class UpdateModel(paddle.callbacks.Callback):
# This callback reset model hidden states and update learning rate before each epoch begins
def __init__(self, base_lr, lr_decay, epoch_start_decay):
self.base_lr = base_lr
self.lr_decay = lr_decay
self.epoch_start_decay = epoch_start_decay
def on_epoch_begin(self, epoch=None, logs=None):
self.model.network.reset_states()
new_lr = self.base_lr * (self.lr_decay
**max(epoch + 1 - self.epoch_start_decay, 0.0))
self.model._optimizer.set_lr(new_lr)
......@@ -13,8 +13,9 @@ paddle.seed(102)
def create_data_loader(batch_size, num_steps, data_path):
train_ds, valid_ds, test_ds = PTBDataset.get_datasets(
[batch_size] * 3, [num_steps] * 3, ['train', 'eval', 'test'])
train_ds = PTBDataset(batch_size, num_steps, 'train')
valid_ds = PTBDataset(batch_size, num_steps, 'eval')
test_ds = PTBDataset(batch_size, num_steps, 'test')
train_loader = DataLoader(train_ds, return_list=True, batch_size=None)
valid_loader = DataLoader(valid_ds, return_list=True, batch_size=None)
......@@ -40,15 +41,15 @@ def train(args):
gloabl_norm_clip = paddle.nn.ClipGradByGlobalNorm(args.max_grad_norm)
cross_entropy = CrossEntropyLossForLm()
ppl_metric = Perplexity()
callback = UpdateModel(
base_lr=args.base_lr,
lr_decay=args.lr_decay,
epoch_start_decay=args.epoch_start_decay)
callback = UpdateModel()
scheduler = paddle.callbacks.LRScheduler(by_step=False, by_epoch=True)
model = paddle.Model(network)
# FIXME(yuanxiaopeng): Use scheduler instead of callback
#scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=args.base_lr, lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0), verbose=True)
optimizer = paddle.optimizer.SGD(learning_rate=args.base_lr,
learning_rate = paddle.optimizer.lr.LambdaDecay(
learning_rate=args.base_lr,
lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0),
verbose=True)
optimizer = paddle.optimizer.SGD(learning_rate=learning_rate,
parameters=model.parameters(),
grad_clip=gloabl_norm_clip)
......@@ -62,7 +63,7 @@ def train(args):
eval_data=valid_loader,
epochs=args.max_epoch,
shuffle=False,
callbacks=[callback],
callbacks=[callback, scheduler],
log_freq=max(1, len(train_loader) // 10))
model.save(path='checkpoint/test') # save for training
......
......@@ -66,7 +66,7 @@ python train.py
```sh
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py
python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py
```
......
......@@ -49,7 +49,7 @@ def create_infer_loader(args):
prepare_infer_input,
bos_idx=args.bos_idx,
eos_idx=args.eos_idx,
pad_idx=args.eos_idx),
pad_idx=args.bos_idx),
num_workers=0,
return_list=True)
data_loaders = (data_loader, batch_sampler.__len__)
......
......@@ -445,6 +445,9 @@ class SQuAD(Dataset):
self.examples = examples
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
feature = self.data[idx]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册