export FLAGS_enable_parallel_graph=1 export FLAGS_sync_nccl_allreduce=1 BERT_BASE_PATH="chinese_L-12_H-768_A-12" TASK_NAME='xnli' DATA_PATH=data/xnli/XNLI-MT-1.0 CKPT_PATH=pretrain_model train(){ python -u run_classifier.py --task_name ${TASK_NAME} \ --use_cuda true \ --do_train true \ --do_val false \ --do_test false \ --batch_size 8192 \ --in_tokens true \ --init_checkpoint pretrain_model/chinese_L-12_H-768_A-12/ \ --data_dir ${DATA_PATH} \ --vocab_path pretrain_model/chinese_L-12_H-768_A-12/vocab.txt \ --checkpoints ${CKPT_PATH} \ --save_steps 1000 \ --weight_decay 0.01 \ --warmup_proportion 0.0 \ --validation_steps 25 \ --epoch 1 \ --max_seq_len 512 \ --bert_config_path pretrain_model/chinese_L-12_H-768_A-12/bert_config.json \ --learning_rate 1e-4 \ --skip_steps 10 \ --random_seed 100 \ --enable_ce \ --shuffle false } export CUDA_VISIBLE_DEVICES=0 train | python _ce.py export CUDA_VISIBLE_DEVICES=0,1,2,3 train | python _ce.py