diff --git a/PaddleNLP/benchmark/bert/README.md b/PaddleNLP/benchmark/bert/README.md index 0ab3f0da325e4c372bbf90aeee71d7a6d7b1bdd2..12d60609fa3775ca92ac666a2cdf5a32972017a5 100644 --- a/PaddleNLP/benchmark/bert/README.md +++ b/PaddleNLP/benchmark/bert/README.md @@ -1,8 +1,8 @@ # BERT Benchmark with Fleet API -先配置运行环境 -export PYTHONPATH=/home/fangzeyang/PaddleNLP -export DATA_DIR=/home/fangzeyang/bert_data/wikicorpus_en +先配置运行环境,clone PaddleNLP的代码,同时下载好预训练的数据到相应的位置 +export PYTHONPATH=${HOME}/models/PaddleNLP +export DATA_DIR=${HOME}/bert_data/wikicorpus_en ## NLP 任务中的Pretraining diff --git a/PaddleNLP/benchmark/transformer/README.md b/PaddleNLP/benchmark/transformer/README.md index 5f2d2801e72046903364089d5c0e3ef232422322..77fb2011833a6c86bee449ffb5a2e7ebff4c39e8 100644 --- a/PaddleNLP/benchmark/transformer/README.md +++ b/PaddleNLP/benchmark/transformer/README.md @@ -20,5 +20,5 @@ python3 train.py ``` shell cd dygraph/ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python3 -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py +python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py ``` diff --git a/PaddleNLP/examples/language_model/rnnlm/model.py b/PaddleNLP/examples/language_model/rnnlm/model.py index 61f8c516a41acfb2802f931d7d3eb5a263d346c1..b28fc1cbef9d8c8b3ded75b05c91b2e375d05dc5 100644 --- a/PaddleNLP/examples/language_model/rnnlm/model.py +++ b/PaddleNLP/examples/language_model/rnnlm/model.py @@ -87,13 +87,6 @@ class CrossEntropyLossForLm(nn.Layer): class UpdateModel(paddle.callbacks.Callback): # This callback reset model hidden states and update learning rate before each epoch begins - def __init__(self, base_lr, lr_decay, epoch_start_decay): - self.base_lr = base_lr - self.lr_decay = lr_decay - self.epoch_start_decay = epoch_start_decay - def on_epoch_begin(self, epoch=None, logs=None): self.model.network.reset_states() - new_lr = self.base_lr * (self.lr_decay - **max(epoch + 1 - self.epoch_start_decay, 0.0)) - self.model._optimizer.set_lr(new_lr) + diff --git a/PaddleNLP/examples/language_model/rnnlm/train.py b/PaddleNLP/examples/language_model/rnnlm/train.py index 3588bd586c09b4c986d0d5a82823ead87b62cdec..cc30a84cdda3c84d883819d18c86a57d36821997 100644 --- a/PaddleNLP/examples/language_model/rnnlm/train.py +++ b/PaddleNLP/examples/language_model/rnnlm/train.py @@ -13,8 +13,9 @@ paddle.seed(102) def create_data_loader(batch_size, num_steps, data_path): - train_ds, valid_ds, test_ds = PTBDataset.get_datasets( - [batch_size] * 3, [num_steps] * 3, ['train', 'eval', 'test']) + train_ds = PTBDataset(batch_size, num_steps, 'train') + valid_ds = PTBDataset(batch_size, num_steps, 'eval') + test_ds = PTBDataset(batch_size, num_steps, 'test') train_loader = DataLoader(train_ds, return_list=True, batch_size=None) valid_loader = DataLoader(valid_ds, return_list=True, batch_size=None) @@ -40,15 +41,15 @@ def train(args): gloabl_norm_clip = paddle.nn.ClipGradByGlobalNorm(args.max_grad_norm) cross_entropy = CrossEntropyLossForLm() ppl_metric = Perplexity() - callback = UpdateModel( - base_lr=args.base_lr, - lr_decay=args.lr_decay, - epoch_start_decay=args.epoch_start_decay) - + callback = UpdateModel() + scheduler = paddle.callbacks.LRScheduler(by_step=False, by_epoch=True) model = paddle.Model(network) - # FIXME(yuanxiaopeng): Use scheduler instead of callback - #scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=args.base_lr, lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0), verbose=True) - optimizer = paddle.optimizer.SGD(learning_rate=args.base_lr, + + learning_rate = paddle.optimizer.lr.LambdaDecay( + learning_rate=args.base_lr, + lr_lambda=lambda x: args.lr_decay**max(x + 1 - args.epoch_start_decay, 0.0), + verbose=True) + optimizer = paddle.optimizer.SGD(learning_rate=learning_rate, parameters=model.parameters(), grad_clip=gloabl_norm_clip) @@ -62,7 +63,7 @@ def train(args): eval_data=valid_loader, epochs=args.max_epoch, shuffle=False, - callbacks=[callback], + callbacks=[callback, scheduler], log_freq=max(1, len(train_loader) // 10)) model.save(path='checkpoint/test') # save for training diff --git a/PaddleNLP/examples/machine_translation/transformer/README.md b/PaddleNLP/examples/machine_translation/transformer/README.md index edf29a72bf6222394fdeef3379913286e339331b..a4181bd7e4f431a7640380c4522b4deecb97130b 100644 --- a/PaddleNLP/examples/machine_translation/transformer/README.md +++ b/PaddleNLP/examples/machine_translation/transformer/README.md @@ -66,7 +66,7 @@ python train.py ```sh export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python -m paddle.distributed.launch --selected_gpus=0,1,2,3,4,5,6,7 train.py +python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py ``` diff --git a/PaddleNLP/examples/machine_translation/transformer/reader.py b/PaddleNLP/examples/machine_translation/transformer/reader.py index 19134b36562766c3364ce7f98a56386ec180ac21..b29db4c082466cdb92244fd04f37bd7795856ab6 100644 --- a/PaddleNLP/examples/machine_translation/transformer/reader.py +++ b/PaddleNLP/examples/machine_translation/transformer/reader.py @@ -49,7 +49,7 @@ def create_infer_loader(args): prepare_infer_input, bos_idx=args.bos_idx, eos_idx=args.eos_idx, - pad_idx=args.eos_idx), + pad_idx=args.bos_idx), num_workers=0, return_list=True) data_loaders = (data_loader, batch_sampler.__len__) diff --git a/PaddleNLP/paddlenlp/datasets/squad.py b/PaddleNLP/paddlenlp/datasets/squad.py index 9cb9bdc841c2ca689117ac68ca58e9bbd1f2c755..8305e1ba4a2216750bfb0e9ce8b37bd282ad0ea4 100644 --- a/PaddleNLP/paddlenlp/datasets/squad.py +++ b/PaddleNLP/paddlenlp/datasets/squad.py @@ -445,6 +445,9 @@ class SQuAD(Dataset): self.examples = examples + def __len__(self): + return len(self.data) + def __getitem__(self, idx): feature = self.data[idx]