model_conf 1.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
output_name="img2txt"

init_model="./model_files/unimo_large_en"
data_path="./data/coco"
object_file_local_path="coco_object_0.2_tot.ids"

# hyper param
lr_scheduler="linear_warmup_decay"
use_fp16="False"
# Merge the ALLReduce times of a layer
use_fuse="True"
use_hierarchical_allreduce="True"
loss_scaling=12800

skip_steps=100
save_steps=10000
validation_steps=10000
label_smooth=0.1
weight_decay=0.01
max_seq_len=50
random_seed=666

#decoding params
do_decode="true"
max_img_len=101
max_obj_len=100
max_tgt_len=50
max_out_len=50
min_out_len=5
beam_size=5
length_penalty=0.6
block_trigram="False"
use_multi_gpu_test="True"

#adam optimizer
beta1=0.9
beta2=0.98
epsilon=1e-06

#dataset
train_filelist="train_filelist"
valid_filelist="valid_filelist"
test_filelist="test_filelist"
do_train="true"
do_val="false"
do_test="true"
do_pred="false"

#evaluate
eval_script="bash ./src/eval/tasks/coco/eval.sh"
eval_mertrics="Bleu_1,Bleu_2,Bleu_3,Bleu_4,METEOR,ROUGE_L,CIDEr,SPICE"

## turning params
pred_batch_size=1
epoch=10
BATCH_SIZE=("2")
LR_RATE=("7e-6")
DD_RAND_SEED=("1")
WARMUP_PROP=("0.06")

## adversial training params
adv_type="villa"
adv_step=4
adv_lr=0.05
norm_type="l2"
adv_max_norm=0.4
adv_init_mag=0.4
adv_kl_weight=1.0
with_pure_model="True"

## configuration
config_path="./model_files/config/unimo_large_en.json"
vocab_file="./model_files/dict/unimo_en.vocab.txt"
bpe_json="./model_files/dict/unimo_en.encoder.json"
bpe_file="./model_files/dict/unimo_en.vocab.bpe"