export FLAGS_eager_delete_tensor_gb=0.0
# Recommending hyper parameters for difference task # squad: batch_size=8, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5 # squad2.0: batch_size=8, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5 # cmrc2018: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=2.5e-5 # drcd: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=2.5e-5 dataset=cmrc2018
python -u reading_comprehension.py \
--batch_size=8 \
--use_gpu=True \
--checkpoint_dir=./ckpt_${dataset} \ --learning_rate=2.5e-5 \
--weight_decay=0.01 \ --warmup_proportion=0.1 \ --num_epoch=2 \
--max_seq_len=512 \ --dataset=${dataset}