run_cls.sh 2.7 KB
Newer Older
1
export FLAGS_eager_delete_tensor_gb=0.0
S
Steffy-zxf 已提交
2
export CUDA_VISIBLE_DEVICES=0
Z
Zeyu Chen 已提交
3

K
kinghuin 已提交
4
CKPT_DIR="./ckpt_chnsenticorp"
K
kinghuin 已提交
5

S
Steffy-zxf 已提交
6 7 8 9 10 11 12 13 14 15
python -u text_cls.py \
            --batch_size=24 \
            --use_gpu=True \
            --checkpoint_dir=${CKPT_DIR} \
            --learning_rate=5e-5 \
            --weight_decay=0.01 \
            --max_seq_len=128 \
            --warmup_proportion=0.1 \
            --num_epoch=3 \
            --use_data_parallel=True
K
kinghuin 已提交
16

K
kinghuin 已提交
17
# The sugguested hyper parameters for difference task
K
kinghuin 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30
# for ChineseGLUE:
# TNews: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# XNLI_zh: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
# INEWS: batch_size=4, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
# DRCD: see demo: reading-comprehension
# CMRC2018: see demo: reading-comprehension
# BQ: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=100, lr=1e-5
# MSRANER: see demo: sequence-labeling
# THUCNEWS: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=5e-5
# IFLYTEKDATA: batch_size=16, weight_decay=0, num_epoch=5, max_seq_len=256, lr=1e-5

# for other tasks:
K
kinghuin 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
# QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
#       Specify the matched/mismatched dev and test dataset  with an underscore.
#       mnli_m or mnli: dev and test in matched dataset.
#       mnli_mm: dev and test in mismatched dataset.
#      The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
#       If you are not sure which one to pick, just use mnli or mnli_m.
K
kinghuin 已提交
46
# XNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
K
kinghuin 已提交
47 48 49 50 51 52
#       Specify the language with an underscore like xnli_zh.
#       ar- Arabic      bg- Bulgarian      de- German
#       el- Greek       en- English        es- Spanish
#       fr- French      hi- Hindi          ru- Russian
#       sw- Swahili     th- Thai           tr- Turkish
#       ur- Urdu        vi- Vietnamese     zh- Chinese (Simplified)