From 3919b4647964f0ef59fbef58121e26f3d9bf24e2 Mon Sep 17 00:00:00 2001
From: shippingwang <shipeng1108@163.com>
Date: Thu, 9 Jul 2020 07:13:51 +0000
Subject: [PATCH] add static debug tool

---
 configs/debug.yaml            | 92 +++++++++++++++++++++++++++++++++++
 ppcls/data/imaug/operators.py | 13 +++--
 ppcls/data/reader.py          |  8 ++-
 tools/program.py              | 30 +++++++++++-
 tools/run_debug.sh            |  8 +++
 tools/train.py                |  6 +++
 6 files changed, 151 insertions(+), 6 deletions(-)
 create mode 100644 configs/debug.yaml
 create mode 100644 tools/run_debug.sh

diff --git a/configs/debug.yaml b/configs/debug.yaml
new file mode 100644
index 00000000..f6ff9f2f
--- /dev/null
+++ b/configs/debug.yaml
@@ -0,0 +1,92 @@
+mode: 'train'
+ARCHITECTURE:
+    name: "MobileNetV1"
+
+pretrained_model: ""
+model_save_dir: "./output/"
+classes_num: 1000
+total_images: 1281167
+save_interval: 1
+validate: True
+valid_interval: 1
+epochs: 2
+topk: 5
+image_shape: [3, 224, 224]
+
+LEARNING_RATE:
+   function: 'Piecewise'
+   params:
+       lr: 0.1
+       decay_epochs: [30, 60, 90]
+       gamma: 0.1
+
+OPTIMIZER:
+    function: 'Momentum'
+    params:
+        momentum: 0.9
+    regularizer:
+        function: 'L2'
+        factor: 0.00003
+
+TRAIN:
+    batch_size: 4
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/val_list_debug.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - RandCropImage:
+            size: 224
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1./255.
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
+
+
+
+VALID:
+    batch_size: 64
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/val_list.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - ResizeImage:
+            resize_short: 256
+        - CropImage:
+            size: 224
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
+
+                
+DEBUG:
+    fake_input: '1.jpg'
+    repeat_times: 8
+    epoch_num: 1
+    forward: True
+    backward: False
+    fetch_list: 'ALL'
+    disable_random:
+        drop_out:
+        random_preprocess:
+        init_function:
+            weights: 1
+            bias: 0
+    save_report: True
+    print_summary: True
diff --git a/ppcls/data/imaug/operators.py b/ppcls/data/imaug/operators.py
index 49c4d95c..2293269a 100644
--- a/ppcls/data/imaug/operators.py
+++ b/ppcls/data/imaug/operators.py
@@ -24,6 +24,7 @@ import math
 import random
 import cv2
 import numpy as np
+import os
 
 from .autoaugment import ImageNetPolicy
 
@@ -151,12 +152,16 @@ class RandCropImage(object):
 
         i = random.randint(0, img_w - w)
         j = random.randint(0, img_h - h)
+        
+        
+        img_adj = img[j:j + h, i:i + w, :]
 
-        img = img[j:j + h, i:i + w, :]
+        if int(os.environ.get('PADDLECLAS_DEBUG')):
+            img_adj = img
         if self.interpolation is None:
-            return cv2.resize(img, size)
+            return cv2.resize(img_adj, size)
         else:
-            return cv2.resize(img, size, interpolation=self.interpolation)
+            return cv2.resize(img_adj, size, interpolation=self.interpolation)
 
 
 class RandFlipImage(object):
@@ -173,6 +178,8 @@ class RandFlipImage(object):
         self.flip_code = flip_code
 
     def __call__(self, img):
+        if int(os.environ.get('PADDLECLAS_DEBUG')):
+            return img
         if random.randint(0, 1) == 1:
             return cv2.flip(img, self.flip_code)
         else:
diff --git a/ppcls/data/reader.py b/ppcls/data/reader.py
index 7433944c..93338512 100755
--- a/ppcls/data/reader.py
+++ b/ppcls/data/reader.py
@@ -192,7 +192,9 @@ def partial_reader(params, full_lines, part_id=0, part_num=1, batch_size=1):
         delimiter = params.get('delimiter', ' ')
         for line in full_lines:
             img_path, label = line.split(delimiter)
+
             img_path = os.path.join(params['data_dir'], img_path)
+
             with open(img_path, 'rb') as f:
                 img = f.read()
             yield (transform(img, ops), int(label))
@@ -259,6 +261,8 @@ class Reader:
         self.use_gpu = config.get("use_gpu", True)
         use_mix = config.get('use_mix')
         self.params['mode'] = mode
+        if int(os.environ.get('PADDLECLAS_DEBUG')):
+            self.params['debug'] = config['DEBUG']
         if seed is not None:
             self.params['shuffle_seed'] = seed
         self.batch_ops = []
@@ -278,6 +282,9 @@ class Reader:
         def wrapper():
             reader = mp_reader(self.params, batch_size)
             batch = []
+            if int(os.environ.get('PADDLECLAS_DEBUG')):
+                fake_image_path = self.params.debug.fake_input
+                reader = partial_reader(self.params, [fake_image_path+' 1'] * self.params.debug.repeat_times)
             for idx, sample in enumerate(reader()):
                 img, label = sample
                 batch.append((img, label))
@@ -288,6 +295,5 @@ class Reader:
 
         return wrapper
 
-
 signal.signal(signal.SIGINT, term_mp)
 signal.signal(signal.SIGTERM, term_mp)
diff --git a/tools/program.py b/tools/program.py
index 12a1a6ed..0c492266 100644
--- a/tools/program.py
+++ b/tools/program.py
@@ -369,6 +369,14 @@ def build(config, main_prog, startup_prog, is_train=True, is_distributed=True):
                         config.get('ema_decay'), thres_steps=global_steps)
                     ema.update()
                     return dataloader, fetchs, ema
+            if int(os.environ.get('PADDLECLAS_DEBUG')):
+                v = []
+                for var in main_prog.list_vars():
+                    #print(var.name)
+                    if 'velocity' not in var.name  and 'conv6_weights' in var.name:
+                        v.append(var.name)
+                fetchs['debug'] = v
+
 
     return dataloader, fetchs
 
@@ -425,16 +433,32 @@ def run(dataloader,
 
     Returns:
     """
-    fetch_list = [f[0] for f in fetchs.values()]
-    metric_list = [f[1] for f in fetchs.values()]
+    fetch_list = []
+    metric_list = []
+    debug_ops = 0
+    for k,v in fetchs.items():
+        if k != 'debug':
+            metric_list.append(v[1])
+            fetch_list.append(v[0])
+        else:
+            debug_ops = debug_ops + len(v)
+            fetch_list = fetch_list+v
+
+
     for m in metric_list:
         m.reset()
     batch_time = AverageMeter('elapse', '.3f')
     tic = time.time()
     for idx, batch in enumerate(dataloader()):
+        
         metrics = exe.run(program=program, feed=batch, fetch_list=fetch_list)
+        
         batch_time.update(time.time() - tic)
         tic = time.time()
+        logger.info("DEBUG {:s} ".format(metrics))
+        metrics = metrics[:-debug_ops]
+
+
         for i, m in enumerate(metrics):
             metric_list[i].update(np.mean(m), len(batch[0]))
         fetchs_str = ''.join([str(m.value) + ' '
@@ -467,6 +491,8 @@ def run(dataloader,
                         logger.coloring(step_str, "PURPLE"),
                         logger.coloring(fetchs_str, 'OKGREEN')))
 
+        #sys.stdout.flush()
+
     end_str = ''.join([str(m.mean) + ' '
                        for m in metric_list] + [batch_time.total]) + 's'
     if mode == 'eval':
diff --git a/tools/run_debug.sh b/tools/run_debug.sh
new file mode 100644
index 00000000..21ae9adc
--- /dev/null
+++ b/tools/run_debug.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+export PYTHONPATH=$PWD:$PYTHONPATH
+
+python -m paddle.distributed.launch \
+    --selected_gpus="0" \
+    tools/train.py \
+        -c ./configs/debug.yaml 
diff --git a/tools/train.py b/tools/train.py
index e9188eb0..d832d231 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -49,6 +49,12 @@ def parse_args():
         action='append',
         default=[],
         help='config options to be overridden')
+    parser.add_argument(
+        '-d',
+        '--debug',
+        default=False,
+        help='debug'
+        )
     args = parser.parse_args()
     return args
 
-- 
GitLab