output_name="seq2seq" init_model="./model_files/unimo_large_en" data_path='./data/squad_qg' # hyper param lr_scheduler="linear_warmup_decay" use_fp16="False" # Merge the ALLReduce times of a layer use_fuse="True" use_hierarchical_allreduce="True" loss_scaling=12800 skip_steps=100 save_steps=5000 validation_steps=5000 label_smooth=0.1 weight_decay=0.01 max_seq_len=512 random_seed=666 #decoding params do_decode="true" max_src_len=416 max_tgt_len=96 max_out_len=48 min_out_len=5 beam_size=6 length_penalty=1.2 block_trigram="false" use_multi_gpu_test="True" #adam optimizer beta1=0.9 beta2=0.98 epsilon=1e-06 #data tokenized_input="True" continuous_position="False" #dataset train_set="train.tsv" dev_set="dev.tsv" test_set="test.tsv" pred_set="test.tsv" do_train="true" do_val="false" do_test="true" do_pred="false" #evaluate eval_script="bash ./src/eval/tasks/squad_qg/eval.sh" eval_mertrics="Bleu_4,METEOR,ROUGE_L" ## turning params in_tokens="False" pred_batch_size=8 epoch=20 BATCH_SIZE=("8") LR_RATE=("5e-6") DD_RAND_SEED=("1") WARMUP_PROP=("0.06") config_path="./model_files/config/unimo_large_en.json" vocab_file="./model_files/dict/unimo_en.vocab.txt" bpe_json="./model_files/dict/unimo_en.encoder.json" bpe_file="./model_files/dict/unimo_en.vocab.bpe"