local_run.sh 1.9 KB
Newer Older
W
weiyue.su 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#!/bin/bash 

set -x
config=${1:-"./config.yaml"}
unset http_proxy https_proxy

function parse_yaml {
   local prefix=$2
   local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034')
   sed -ne "s|^\($s\):|\1|" \
        -e "s|^\($s\)\($w\)$s:$s[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \
        -e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p"  $1 |
   awk -F$fs '{
      indent = length($1)/2;
      vname[indent] = $2;
      for (i in vname) {if (i > indent) {delete vname[i]}}
      if (length($3) > 0) {
         vn=""; for (i=0; i<indent; i++) {vn=(vn)(vname[i])("_")}
         printf("%s%s%s=\"%s\"\n", "'$prefix'",vn, $2, $3);
      }
   }'
}

transpiler_local_train(){
    export PADDLE_TRAINERS_NUM=1
    export PADDLE_PSERVERS_NUM=1
    export PADDLE_PORT=6206
    export PADDLE_PSERVERS="127.0.0.1"
    export BASE="./local_dir"
    echo `which python`
    if [ -d ${BASE} ]; then
        rm -rf ${BASE}
    fi 
    mkdir ${BASE}
    rm job_id
    for((i=0;i<${PADDLE_PSERVERS_NUM};i++))
    do
        echo "start ps server: ${i}"
        TRAINING_ROLE="PSERVER" PADDLE_TRAINER_ID=${i} sh job.sh local $config \
            &> $BASE/pserver.$i.log &
        echo $! >> job_id
    done
    sleep 3s 
    for((j=0;j<${PADDLE_TRAINERS_NUM};j++))
    do
        echo "start ps work: ${j}"
        TRAINING_ROLE="TRAINER" PADDLE_TRAINER_ID=${j} sh job.sh local $config \
        echo $! >> job_id
    done
}

collective_local_train(){
    export PATH=./python27-gcc482-gpu/bin/:$PATH
    echo `which python`
    python -m paddle.distributed.launch train.py --conf $config
    python -m paddle.distributed.launch infer.py --conf $config
}

eval $(parse_yaml $config)

S
suweiyue 已提交
61 62
python3 ./preprocessing/dump_graph.py -i $input_data -o $graph_path --encoding $encoding \
    -l $max_seqlen --vocab_file $ernie_vocab_file
W
weiyue.su 已提交
63 64 65 66 67 68 69

if [[ $learner_type == "cpu" ]];then
    transpiler_local_train
fi
if [[ $learner_type == "gpu" ]];then
    collective_local_train
fi