add pse to benchmark

46c170bc · 文幕地方 · cc01a59b · 46c170bc · 46c170bc · 46c170bc
Showing with 37 addition and 29 deletion

benchmark/readme.md benchmark/readme.md +2 -6

benchmark/run_benchmark_det.sh benchmark/run_benchmark_det.sh +12 -6

benchmark/run_det.sh benchmark/run_det.sh +5 -5

tools/program.py tools/program.py +18 -12

未找到文件。
--- a/benchmark/readme.md
+++ b/benchmark/readme.md

-# PaddleOCR DB/EAST 算法训练benchmark测试
+# PaddleOCR DB/EAST/PSE 算法训练benchmark测试

 PaddleOCR/benchmark目录下的文件用于获取并分析训练日志。
 训练采用icdar2015数据集，包括1000张训练图像和500张测试图像。模型配置采用resnet18_vd作为backbone，分别训练batch_size=8和batch_size=16的情况。
@@ -28,7 +28,3 @@ det_res18_db_v2.0_sp_bs8_fp32_1
 det_res18_db_v2.0_mp_bs16_fp32_1
 det_res18_db_v2.0_mp_bs8_fp32_1
 ```
-
-
-
-
--- a/benchmark/run_benchmark_det.sh
+++ b/benchmark/run_benchmark_det.sh
@@ -6,7 +6,7 @@ function _set_params(){
    run_mode=${1:-"sp"}          # 单卡sp|多卡mp
    batch_size=${2:-"64"}
    fp_item=${3:-"fp32"}        # fp32|fp16
-    max_iter=${4:-"500"}       # 可选，如果需要修改代码提前中断
+    max_iter=${4:-"10"}       # 可选，如果需要修改代码提前中断
    model_name=${5:-"model_name"}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数

@@ -20,7 +20,7 @@ function _train(){
    echo "Train on ${num_gpu_devices} GPUs"
    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"

-    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} "   
+    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2"   
    case ${run_mode} in
      sp) 
        train_cmd="python3.7 tools/train.py "${train_cmd}""
@@ -39,18 +39,24 @@ function _train(){
        echo -e "${model_name}, SUCCESS"
        export job_fail_flag=0
    fi
-    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`

    if [ $run_mode = "mp" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.0 ${log_file}
    fi
+}

-    # run log analysis
-    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_szie} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
+function _analysis_log(){
+    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
    eval $analysis_cmd
 }

+function _kill_process(){
+    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`
+}
+
+
 _set_params $@
 _train
-
+_analysis_log
+_kill_process
\ No newline at end of file
--- a/benchmark/run_det.sh
+++ b/benchmark/run_det.sh
@@ -3,11 +3,11 @@
 # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
 python3.7 -m pip install -r requirements.txt
 # 2 拷贝该模型需要数据、预训练模型
-wget -c  -p ./tain_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
-wget -c -p ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
+wget -P ./train_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
+wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
 # 3 批量运行（如不方便批量，1，2需放到单个模型中）

-model_mode_list=(det_res18_db_v2.0 det_r50_vd_east)
+model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse)
 fp_item_list=(fp32)
 bs_list=(8 16)
 for model_mode in ${model_mode_list[@]}; do
@@ -15,11 +15,11 @@ for model_mode in ${model_mode_list[@]}; do
          for bs_item in ${bs_list[@]}; do
            echo "index is speed, 1gpus, begin, ${model_name}"
            run_mode=sp
-            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode}     #  (5min)
+            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode}     #  (5min)
            sleep 60
            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
            run_mode=mp
-            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} 
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} 
            sleep 60
            done
      done

--- a/tools/program.py
+++ b/tools/program.py
@@ -212,15 +212,15 @@ def train(config,
    for epoch in range(start_epoch, epoch_num + 1):
        train_dataloader = build_dataloader(
            config, 'Train', device, logger, seed=epoch)
-        train_batch_cost = 0.0
        train_reader_cost = 0.0
-        batch_sum = 0
-        batch_start = time.time()
+        train_run_cost = 0.0
+        total_samples = 0
+        reader_start = time.time()
        max_iter = len(train_dataloader) - 1 if platform.system(
        ) == "Windows" else len(train_dataloader)
        for idx, batch in enumerate(train_dataloader):
            profiler.add_profiler_step(profiler_options)
-            train_reader_cost += time.time() - batch_start
+            train_reader_cost += time.time() - reader_start
            if idx >= max_iter:
                break
            lr = optimizer.get_lr()
@@ -228,6 +228,7 @@ def train(config,
            if use_srn:
                model_average = True

+            train_start = time.time()
            # use amp
            if scaler:
                with paddle.amp.auto_cast():
@@ -252,8 +253,8 @@ def train(config,
                optimizer.step()
            optimizer.clear_grad()

-            train_batch_cost += time.time() - batch_start
-            batch_sum += len(images)
+            train_run_cost += time.time() - train_start
+            total_samples += len(images)

            if not isinstance(lr_scheduler, float):
                lr_scheduler.step()
@@ -284,12 +285,13 @@ def train(config,
                logs = train_stats.log()
                strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format(
                    epoch, epoch_num, global_step, logs, train_reader_cost /
-                    print_batch_step, train_batch_cost / print_batch_step,
-                    batch_sum, batch_sum / train_batch_cost)
+                    print_batch_step, (train_reader_cost + train_run_cost) /
+                    print_batch_step, total_samples,
+                    total_samples / (train_reader_cost + train_run_cost))
                logger.info(strs)
-                train_batch_cost = 0.0
                train_reader_cost = 0.0
-                batch_sum = 0
+                train_run_cost = 0.0
+                total_samples = 0
            # eval
            if global_step > start_eval_step and \
                    (global_step - start_eval_step) % eval_batch_step == 0 and dist.get_rank() == 0:
@@ -342,7 +344,7 @@ def train(config,
                                          global_step)
            global_step += 1
            optimizer.clear_grad()
-            batch_start = time.time()
+            reader_start = time.time()
        if dist.get_rank() == 0:
            save_model(
                model,
@@ -383,7 +385,11 @@ def eval(model,
    with paddle.no_grad():
        total_frame = 0.0
        total_time = 0.0
-        pbar = tqdm(total=len(valid_dataloader), desc='eval model:')
+        pbar = tqdm(
+            total=len(valid_dataloader),
+            desc='eval model:',
+            position=0,
+            leave=True)
        max_iter = len(valid_dataloader) - 1 if platform.system(
        ) == "Windows" else len(valid_dataloader)
        for idx, batch in enumerate(valid_dataloader):