run_classifier.sh 3.0 KB
Newer Older
1
export FLAGS_eager_delete_tensor_gb=0.0
S
Steffy-zxf 已提交
2
export CUDA_VISIBLE_DEVICES=0
Z
Zeyu Chen 已提交
3

K
kinghuin 已提交
4
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
Z
Zeyu Chen 已提交
5
DATASET="chnsenticorp"
Z
Zeyu Chen 已提交
6
CKPT_DIR="./ckpt_${DATASET}"
K
kinghuin 已提交
7

K
kinghuin 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20
python -u text_classifier.py \
                   --batch_size=24 \
                   --use_gpu=True \
                   --dataset=${DATASET} \
                   --checkpoint_dir=${CKPT_DIR} \
                   --learning_rate=5e-5 \
                   --weight_decay=0.01 \
                   --max_seq_len=128 \
                   --num_epoch=3 \
                   --use_pyreader=True \
                   --use_data_parallel=True \
                   --use_taskid=False

K
kinghuin 已提交
21
# Recommending hyper parameters for difference task
K
kinghuin 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34
# for ChineseGLUE:
# TNews: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# XNLI_zh: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
# INEWS: batch_size=4, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
# DRCD: see demo: reading-comprehension
# CMRC2018: see demo: reading-comprehension
# BQ: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=100, lr=1e-5
# MSRANER: see demo: sequence-labeling
# THUCNEWS: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=5e-5
# IFLYTEKDATA: batch_size=16, weight_decay=0, num_epoch=5, max_seq_len=256, lr=1e-5

# for other tasks:
K
kinghuin 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
# QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
#       Specify the matched/mismatched dev and test dataset  with an underscore.
#       mnli_m or mnli: dev and test in matched dataset.
#       mnli_mm: dev and test in mismatched dataset.
#      The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
#       If you are not sure which one to pick, just use mnli or mnli_m.
K
kinghuin 已提交
50
# XNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
K
kinghuin 已提交
51 52 53 54 55 56
#       Specify the language with an underscore like xnli_zh.
#       ar- Arabic      bg- Bulgarian      de- German
#       el- Greek       en- English        es- Spanish
#       fr- French      hi- Hindi          ru- Russian
#       sw- Swahili     th- Thai           tr- Turkish
#       ur- Urdu        vi- Vietnamese     zh- Chinese (Simplified)