#!/bin/env bash

export PATH=/home/lilong/sandyhouse/PLSC/python/bin:$PATH
export FLAGS_eager_delete_tensor_gb=0.0
export GLOG_v=0

## case 2: run with softmax
#python do_train.py \
#    --model_save_dir="./saved_model" \
#    --data_dir="./data" \
#    --num_epochs=2 \
#    --loss_type='softmax'
#
# case 3: run with distarcface
python -m paddle.distributed.launch \
    --log_dir='mylog' \
    --selected_gpus="0,1,2,3,4,5,6,7" \
    --started_port="12349" \
    do_train.py \
    --data_dir="./data" \
    --num_epochs=10000000 \
    --loss_type='dist_arcface'
#
## case 4: run with distsoftmax
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --model_save_dir="./saved_model" \
#    --data_dir="./data" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'

## case 5: run from checkpoints with same number of trainers
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --model_save_dir="./saved_model" \
#    --checkpoint_dir="./saved_model/1" \
#    --data_dir="./data" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'

## case 6: run from checkpoints with incresement number of trainers
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1,2,3" \
#    --started_port="12345" \
#    do_train.py \
#    --model_save_dir="./saved_model" \
#    --checkpoint_dir="./saved_model/0" \
#    --data_dir="./data" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'
#
## case 7: run from checkpoints with decreasement number of trainers
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --model_save_dir="./saved_model" \
#    --checkpoint_dir="./saved_model/0" \
#    --data_dir="./data" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'

## case 8: save models to hdfs
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --model_save_dir="./saved_model" \
#    --data_dir="./data" \
#    --fs_name=${FS_NAME} \
#    --fs_ugi=${FS_UGI} \
#    --fs_dir_save="/user/paddle/lilong/models/saved_model2" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'

## case 9: get models from hdfs
#python -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --checkpoint_dir="./saved_model/" \
#    --data_dir="./data" \
#    --fs_name=${FS_NAME} \
#    --fs_ugi=${FS_UGI} \
#    --fs_dir_load="/user/paddle/lilong/models/saved_model/0" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'

## case 10: get models from hdfs and save models to hdfs
#python3 -m paddle.distributed.launch \
#    --log_dir='mylog' \
#    --selected_gpus="0,1" \
#    --started_port="12345" \
#    do_train.py \
#    --checkpoint_dir="./saved_model/" \
#    --data_dir="./data" \
#    --fs_name=${FS_NAME} \
#    --fs_ugi=${FS_UGI} \
#    --fs_dir_load="/user/paddle/lilong/models/saved_model/0" \
#    --fs_dir_save="/user/paddle/lilong/models/saved_model2" \
#    --num_epochs=2 \
#    --loss_type='dist_softmax'