run_cls_predefine_net.sh 2.8 KB
Newer Older
1
export FLAGS_eager_delete_tensor_gb=0.0
S
Steffy-zxf 已提交
2
export CUDA_VISIBLE_DEVICES=0
Z
Zeyu Chen 已提交
3

S
Steffy-zxf 已提交
4
CKPT_DIR="./ckpt_chnsenticorp_predefine_net"
K
kinghuin 已提交
5

S
Steffy-zxf 已提交
6
python -u text_cls_predefine_net.py \
K
kinghuin 已提交
7 8 9 10 11 12
                   --batch_size=24 \
                   --use_gpu=True \
                   --checkpoint_dir=${CKPT_DIR} \
                   --learning_rate=5e-5 \
                   --weight_decay=0.01 \
                   --max_seq_len=128 \
K
kinghuin 已提交
13
                   --warmup_proportion=0.1 \
K
kinghuin 已提交
14
                   --num_epoch=3 \
S
Steffy-zxf 已提交
15 16
                   --use_data_parallel=True \
                   --network=bilstm
K
kinghuin 已提交
17

K
kinghuin 已提交
18
# The sugguested hyper parameters for difference task
K
kinghuin 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31
# for ChineseGLUE:
# TNews: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# XNLI_zh: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
# INEWS: batch_size=4, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
# DRCD: see demo: reading-comprehension
# CMRC2018: see demo: reading-comprehension
# BQ: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=100, lr=1e-5
# MSRANER: see demo: sequence-labeling
# THUCNEWS: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=5e-5
# IFLYTEKDATA: batch_size=16, weight_decay=0, num_epoch=5, max_seq_len=256, lr=1e-5

# for other tasks:
K
kinghuin 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
# QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
#       Specify the matched/mismatched dev and test dataset  with an underscore.
#       mnli_m or mnli: dev and test in matched dataset.
#       mnli_mm: dev and test in mismatched dataset.
#      The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
#       If you are not sure which one to pick, just use mnli or mnli_m.
K
kinghuin 已提交
47
# XNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
K
kinghuin 已提交
48 49 50 51 52 53
#       Specify the language with an underscore like xnli_zh.
#       ar- Arabic      bg- Bulgarian      de- German
#       el- Greek       en- English        es- Spanish
#       fr- French      hi- Hindi          ru- Russian
#       sw- Swahili     th- Thai           tr- Turkish
#       ur- Urdu        vi- Vietnamese     zh- Chinese (Simplified)