From 3919b4647964f0ef59fbef58121e26f3d9bf24e2 Mon Sep 17 00:00:00 2001 From: shippingwang Date: Thu, 9 Jul 2020 07:13:51 +0000 Subject: [PATCH] add static debug tool --- configs/debug.yaml | 92 +++++++++++++++++++++++++++++++++++ ppcls/data/imaug/operators.py | 13 +++-- ppcls/data/reader.py | 8 ++- tools/program.py | 30 +++++++++++- tools/run_debug.sh | 8 +++ tools/train.py | 6 +++ 6 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 configs/debug.yaml create mode 100644 tools/run_debug.sh diff --git a/configs/debug.yaml b/configs/debug.yaml new file mode 100644 index 00000000..f6ff9f2f --- /dev/null +++ b/configs/debug.yaml @@ -0,0 +1,92 @@ +mode: 'train' +ARCHITECTURE: + name: "MobileNetV1" + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 2 +topk: 5 +image_shape: [3, 224, 224] + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.00003 + +TRAIN: + batch_size: 4 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list_debug.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + + +DEBUG: + fake_input: '1.jpg' + repeat_times: 8 + epoch_num: 1 + forward: True + backward: False + fetch_list: 'ALL' + disable_random: + drop_out: + random_preprocess: + init_function: + weights: 1 + bias: 0 + save_report: True + print_summary: True diff --git a/ppcls/data/imaug/operators.py b/ppcls/data/imaug/operators.py index 49c4d95c..2293269a 100644 --- a/ppcls/data/imaug/operators.py +++ b/ppcls/data/imaug/operators.py @@ -24,6 +24,7 @@ import math import random import cv2 import numpy as np +import os from .autoaugment import ImageNetPolicy @@ -151,12 +152,16 @@ class RandCropImage(object): i = random.randint(0, img_w - w) j = random.randint(0, img_h - h) + + + img_adj = img[j:j + h, i:i + w, :] - img = img[j:j + h, i:i + w, :] + if int(os.environ.get('PADDLECLAS_DEBUG')): + img_adj = img if self.interpolation is None: - return cv2.resize(img, size) + return cv2.resize(img_adj, size) else: - return cv2.resize(img, size, interpolation=self.interpolation) + return cv2.resize(img_adj, size, interpolation=self.interpolation) class RandFlipImage(object): @@ -173,6 +178,8 @@ class RandFlipImage(object): self.flip_code = flip_code def __call__(self, img): + if int(os.environ.get('PADDLECLAS_DEBUG')): + return img if random.randint(0, 1) == 1: return cv2.flip(img, self.flip_code) else: diff --git a/ppcls/data/reader.py b/ppcls/data/reader.py index 7433944c..93338512 100755 --- a/ppcls/data/reader.py +++ b/ppcls/data/reader.py @@ -192,7 +192,9 @@ def partial_reader(params, full_lines, part_id=0, part_num=1, batch_size=1): delimiter = params.get('delimiter', ' ') for line in full_lines: img_path, label = line.split(delimiter) + img_path = os.path.join(params['data_dir'], img_path) + with open(img_path, 'rb') as f: img = f.read() yield (transform(img, ops), int(label)) @@ -259,6 +261,8 @@ class Reader: self.use_gpu = config.get("use_gpu", True) use_mix = config.get('use_mix') self.params['mode'] = mode + if int(os.environ.get('PADDLECLAS_DEBUG')): + self.params['debug'] = config['DEBUG'] if seed is not None: self.params['shuffle_seed'] = seed self.batch_ops = [] @@ -278,6 +282,9 @@ class Reader: def wrapper(): reader = mp_reader(self.params, batch_size) batch = [] + if int(os.environ.get('PADDLECLAS_DEBUG')): + fake_image_path = self.params.debug.fake_input + reader = partial_reader(self.params, [fake_image_path+' 1'] * self.params.debug.repeat_times) for idx, sample in enumerate(reader()): img, label = sample batch.append((img, label)) @@ -288,6 +295,5 @@ class Reader: return wrapper - signal.signal(signal.SIGINT, term_mp) signal.signal(signal.SIGTERM, term_mp) diff --git a/tools/program.py b/tools/program.py index 12a1a6ed..0c492266 100644 --- a/tools/program.py +++ b/tools/program.py @@ -369,6 +369,14 @@ def build(config, main_prog, startup_prog, is_train=True, is_distributed=True): config.get('ema_decay'), thres_steps=global_steps) ema.update() return dataloader, fetchs, ema + if int(os.environ.get('PADDLECLAS_DEBUG')): + v = [] + for var in main_prog.list_vars(): + #print(var.name) + if 'velocity' not in var.name and 'conv6_weights' in var.name: + v.append(var.name) + fetchs['debug'] = v + return dataloader, fetchs @@ -425,16 +433,32 @@ def run(dataloader, Returns: """ - fetch_list = [f[0] for f in fetchs.values()] - metric_list = [f[1] for f in fetchs.values()] + fetch_list = [] + metric_list = [] + debug_ops = 0 + for k,v in fetchs.items(): + if k != 'debug': + metric_list.append(v[1]) + fetch_list.append(v[0]) + else: + debug_ops = debug_ops + len(v) + fetch_list = fetch_list+v + + for m in metric_list: m.reset() batch_time = AverageMeter('elapse', '.3f') tic = time.time() for idx, batch in enumerate(dataloader()): + metrics = exe.run(program=program, feed=batch, fetch_list=fetch_list) + batch_time.update(time.time() - tic) tic = time.time() + logger.info("DEBUG {:s} ".format(metrics)) + metrics = metrics[:-debug_ops] + + for i, m in enumerate(metrics): metric_list[i].update(np.mean(m), len(batch[0])) fetchs_str = ''.join([str(m.value) + ' ' @@ -467,6 +491,8 @@ def run(dataloader, logger.coloring(step_str, "PURPLE"), logger.coloring(fetchs_str, 'OKGREEN'))) + #sys.stdout.flush() + end_str = ''.join([str(m.mean) + ' ' for m in metric_list] + [batch_time.total]) + 's' if mode == 'eval': diff --git a/tools/run_debug.sh b/tools/run_debug.sh new file mode 100644 index 00000000..21ae9adc --- /dev/null +++ b/tools/run_debug.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +export PYTHONPATH=$PWD:$PYTHONPATH + +python -m paddle.distributed.launch \ + --selected_gpus="0" \ + tools/train.py \ + -c ./configs/debug.yaml diff --git a/tools/train.py b/tools/train.py index e9188eb0..d832d231 100644 --- a/tools/train.py +++ b/tools/train.py @@ -49,6 +49,12 @@ def parse_args(): action='append', default=[], help='config options to be overridden') + parser.add_argument( + '-d', + '--debug', + default=False, + help='debug' + ) args = parser.parse_args() return args -- GitLab