未验证 提交 57705aa5 编写于 作者: R ruri 提交者: GitHub

refine image classification dataloader, optimizer, etc. (#4101)

上级 c889c814
......@@ -53,6 +53,9 @@ add_arg('same_feed', int, 0, "Whether to feed same im
add_arg('print_step', int, 1, "the batch step to print info")
# yapf: enable
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def eval(args):
model_list = [m for m in dir(models) if "__" not in m]
......@@ -159,7 +162,7 @@ def eval(args):
info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \
"%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \
"%2.2f sec" % period)
print(info)
logger.info(info)
sys.stdout.flush()
parallel_id = []
......@@ -179,7 +182,7 @@ def eval(args):
"test_acc5": test_acc5
}
save_json(info_dict, args.save_json_path)
print(info)
logger.info(info)
sys.stdout.flush()
......
......@@ -24,6 +24,7 @@ import numpy as np
import argparse
import functools
import re
import logging
import paddle
import paddle.fluid as fluid
......@@ -56,6 +57,9 @@ add_arg('batch_size', int, 8, "batch_size on all the d
add_arg('save_json_path', str, None, "save output to a json file")
# yapf: enable
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def infer(args):
model_list = [m for m in dir(models) if "__" not in m]
......@@ -117,7 +121,7 @@ def infer(args):
executor=exe,
model_filename='model',
params_filename='params')
print("model: ", args.model, " is already saved")
logger.info("model: ", args.model, " is already saved")
exit(0)
imagenet_reader = reader.ImageNetReader()
......@@ -126,7 +130,8 @@ def infer(args):
TOPK = args.topk
if os.path.exists(args.class_map_path):
print("The map of readable label and numerical label has been found!")
logger.info(
"The map of readable label and numerical label has been found!")
with open(args.class_map_path) as f:
label_dict = {}
strinfo = re.compile(r"\d+ ")
......@@ -174,7 +179,7 @@ def infer(args):
info[real_id]['score'], info[real_id]['class'] = str(res[
pred_label]), str(pred_label)
print(real_id, info[real_id])
logger.info(real_id, info[real_id])
sys.stdout.flush()
if args.save_json_path:
......
......@@ -16,12 +16,16 @@ import argparse
import numpy as np
import cv2
import os
import logging
from paddle import fluid
from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import create_paddle_predictor
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def resize_short(img, target_size, interpolation=None):
"""resize image
......@@ -116,8 +120,8 @@ def predict(args):
cls = np.argmax(output)
score = output[cls]
print("class: ", cls)
print("score: ", score)
logger.info("class: ", cls)
logger.info("score: ", score)
return
......
......@@ -16,6 +16,7 @@ import os
import math
import random
import functools
import logging
import numpy as np
import cv2
......@@ -26,6 +27,9 @@ from PIL import Image
policy = None
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
random.seed(0)
np.random.seed(0)
......@@ -257,7 +261,7 @@ def process_batch_data(input_data, settings, mode, color_jitter, rotate):
batch_data.append(
process_image(sample, settings, mode, color_jitter, rotate))
else:
print("File not exist : %s" % sample[0])
logger.info("File not exist : %s" % sample[0])
return batch_data
......@@ -272,17 +276,19 @@ class ImageNetReader:
def _get_single_card_bs(self, settings, mode):
if settings.use_gpu:
if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // paddle.fluid.core.get_cuda_device_count(
)
single_card_bs = int(
settings.test_batch_size
) // paddle.fluid.core.get_cuda_device_count()
else:
single_card_bs = settings.batch_size // paddle.fluid.core.get_cuda_device_count(
)
single_card_bs = int(
settings.
batch_size) // paddle.fluid.core.get_cuda_device_count()
else:
if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // int(
single_card_bs = int(settings.test_batch_size) // int(
os.environ.get('CPU_NUM', 1))
else:
single_card_bs = settings.batch_size // int(
single_card_bs = int(settings.batch_size) // int(
os.environ.get('CPU_NUM', 1))
return single_card_bs
......@@ -303,8 +309,8 @@ class ImageNetReader:
with open(file_list) as flist:
full_lines = [line.strip() for line in flist]
if mode != "test" and len(full_lines) < settings.batch_size:
print(
"Warning: The number of the whole data ({}) is smaller than the batch_size ({}), and drop_last is turnning on, so nothing will feed in program, Terminated now. Please reset batch_size to a smaller number or feed more data!".
logger.error(
"Error: The number of the whole data ({}) is smaller than the batch_size ({}), and drop_last is turnning on, so nothing will feed in program, Terminated now. Please set the batch_size to a smaller number or feed more data!".
format(len(full_lines), settings.batch_size))
os._exit(1)
if num_trainers > 1 and mode == "train":
......@@ -318,8 +324,8 @@ class ImageNetReader:
batch_data = []
if (mode == "train" or mode == "val") and settings.same_feed:
temp_file = full_lines[0]
print("Same images({},nums:{}) will feed in the net".format(
str(temp_file), settings.same_feed))
logger.info("Same images({},nums:{}) will feed in the net".
format(str(temp_file), settings.same_feed))
full_lines = []
for i in range(settings.same_feed):
full_lines.append(temp_file)
......
......@@ -19,6 +19,7 @@ from __future__ import print_function
import os
import time
import sys
import logging
import numpy as np
import paddle
......@@ -29,6 +30,9 @@ from utils import *
import models
from build_model import create_model
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def build_program(is_train, main_prog, startup_prog, args):
"""build program, and add backward op in program accroding to different mode
......@@ -259,12 +263,12 @@ def train(args):
if trainer_id == 0 and args.validate:
if args.use_ema:
print('ExponentialMovingAverage validate start...')
logger.info('ExponentialMovingAverage validate start...')
with ema.apply(exe):
validate(args, test_iter, exe, test_prog, test_fetch_list,
pass_id, train_batch_metrics_record,
compiled_train_prog)
print('ExponentialMovingAverage validate over!')
logger.info('ExponentialMovingAverage validate over!')
validate(args, test_iter, exe, test_prog, test_fetch_list, pass_id,
train_batch_metrics_record, train_batch_time_record,
......
......@@ -18,6 +18,9 @@ from __future__ import print_function
import os
import paddle.fluid as fluid
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def nccl2_prepare(args, startup_prog, main_prog):
config = fluid.DistributeTranspilerConfig()
......@@ -81,8 +84,8 @@ def prepare_for_multi_process(exe, build_strategy, train_prog):
trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0))
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
if num_trainers < 2: return
print("PADDLE_TRAINERS_NUM", num_trainers)
print("PADDLE_TRAINER_ID", trainer_id)
logger.info("PADDLE_TRAINERS_NUM", num_trainers)
logger.info("PADDLE_TRAINER_ID", trainer_id)
build_strategy.num_trainers = num_trainers
build_strategy.trainer_id = trainer_id
# NOTE(zcd): use multi processes to train the model,
......
......@@ -16,8 +16,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import six
import argparse
import functools
......@@ -28,13 +26,21 @@ import warnings
import signal
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.wrapped_decorator import signature_safe_contextmanager
from paddle.fluid.framework import Program, program_guard, name_scope, default_main_program
from paddle.fluid import unique_name, layers
import distutils.util
from utils import dist_utils
from utils.optimizer import Optimizer
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def print_arguments(args):
"""Print argparse's arguments.
......@@ -51,10 +57,11 @@ def print_arguments(args):
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("------------- Configuration Arguments -------------")
logger.info("------------- Configuration Arguments -------------")
for arg, value in sorted(six.iteritems(vars(args))):
print("%25s : %s" % (arg, value))
print("----------------------------------------------------")
logger.info("%25s : %s" % (arg, value))
logger.info("----------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs):
......@@ -104,7 +111,7 @@ def parse_args():
add_arg('num_epochs', int, 120, "The number of total epochs.")
add_arg('class_dim', int, 1000, "The number of total classes.")
add_arg('batch_size', int, 8, "Minibatch size on all the devices.")
add_arg('test_batch_size', int, None, "Test batch size on all the devices.")
add_arg('test_batch_size', int, 8, "Test batch size on all the devices.")
add_arg('lr', float, 0.1, "The learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
add_arg('l2_decay', float, 1e-4, "The l2_decay parameter.")
......@@ -174,7 +181,7 @@ def check_gpu():
try:
if args.use_gpu and not fluid.is_compiled_with_cuda():
print(err)
logger.error(err)
sys.exit(1)
except Exception as e:
pass
......@@ -192,7 +199,7 @@ def check_version():
try:
fluid.require_version('1.6.0')
except Exception as e:
print(err)
logger.error(err)
sys.exit(1)
......@@ -202,6 +209,8 @@ def check_args(args):
Args:
all arguments
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# check models name
sys.path.append("..")
......@@ -211,29 +220,30 @@ def check_args(args):
args.model, model_list)
# check learning rate strategy
lr_strategy_list = [
"piecewise_decay", "cosine_decay", "linear_decay",
"cosine_decay_warmup", "exponential_decay_warmup"
]
lr_strategy_list = [l for l in dir(Optimizer) if not l.startswith('__')]
if args.lr_strategy not in lr_strategy_list:
warnings.warn(
"\n{} is not in lists: {}, \nUse default learning strategy now.".
logger.warning(
"\n{} is not in lists: {}, \nUse default learning strategy now!".
format(args.lr_strategy, lr_strategy_list))
args.lr_strategy = "default_decay"
# check confict of GoogLeNet and mixup
if args.model == "GoogLeNet":
assert args.use_mixup == False, "Cannot use mixup processing in GoogLeNet, please set use_mixup = False."
# check interpolation of reader settings
if args.interpolation:
assert args.interpolation in [
0, 1, 2, 3, 4
], "Wrong interpolation, please set:\n0: cv2.INTER_NEAREST\n1: cv2.INTER_LINEAR\n2: cv2.INTER_CUBIC\n3: cv2.INTER_AREA\n4: cv2.INTER_LANCZOS4"
# check padding type
if args.padding_type:
assert args.padding_type in [
"SAME", "VALID", "DYNAMIC"
], "Wrong padding_type, please set:\nSAME\nVALID\nDYNAMIC"
# check checkpint and pretrained_model
assert args.checkpoint is None or args.pretrained_model is None, "Do not init model by checkpoint and pretrained_model both."
# check pretrained_model path for loading
......@@ -250,14 +260,6 @@ def check_args(args):
args.checkpoint
), "please support available checkpoint path for initing model."
# check params for loading
"""
if args.save_params:
assert isinstance(args.save_params, str)
assert os.path.isdir(
args.save_params), "please support available save_params path."
"""
# check gpu: when using gpu, the number of visible cards should divide batch size
if args.use_gpu:
assert args.batch_size % fluid.core.get_cuda_device_count(
......@@ -269,14 +271,17 @@ def check_args(args):
args.data_dir
), "Data doesn't exist in {}, please load right path".format(args.data_dir)
# check CE
if args.enable_ce:
args.random_seed = 0
print("CE is running now!")
logger.warning("CE is running now! already set random seed to 0")
# check class_dim
assert args.class_dim > 1, "class_dim must greater than 1"
# check dali preprocess
if args.use_dali:
print(
logger.warning(
"DALI preprocessing is activated!!!\nWarning: 1. Please make sure paddlepaddle is compiled by GCC5.4 or later version!\n\t 2. Please make sure nightly builds DALI is installed correctly.\n----------------------------------------------------"
)
......@@ -291,7 +296,7 @@ def init_model(exe, args, program):
if args.checkpoint:
fluid.io.load_persistables(exe, args.checkpoint, main_program=program)
print("Finish initing model from %s" % (args.checkpoint))
logger.info("Finish initing model from %s" % (args.checkpoint))
if args.pretrained_model:
"""
......@@ -332,7 +337,7 @@ def init_model(exe, args, program):
Parameter) and not fc_exclude_flag and os.path.exists(
os.path.join(args.pretrained_model, var.name))
print("Load pretrain weights from {}, exclude params {}.".format(
logger.info("Load pretrain weights from {}, exclude params {}.".format(
args.pretrained_model, final_fc_name))
vars = filter(is_parameter, program.list_vars())
fluid.io.load_vars(
......@@ -347,7 +352,7 @@ def save_model(args, exe, train_prog, info):
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path, main_program=train_prog)
print("Already save model in %s" % (model_path))
logger.info("Already save model in %s" % (model_path))
def save_json(info, path):
......@@ -382,6 +387,8 @@ def create_data_loader(is_train, args):
feed_y_a = fluid.data(
name="feed_y_a", shape=[None, 1], dtype="int64", lod_level=0)
capacity = 64 if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) <= 1 else 8
if is_train and args.use_mixup:
feed_y_b = fluid.data(
name="feed_y_b", shape=[None, 1], dtype="int64", lod_level=0)
......@@ -390,7 +397,7 @@ def create_data_loader(is_train, args):
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[feed_image, feed_y_a, feed_y_b, feed_lam],
capacity=64,
capacity=capacity,
use_double_buffer=True,
iterable=True)
return data_loader, [feed_image, feed_y_a, feed_y_b, feed_lam]
......@@ -400,7 +407,7 @@ def create_data_loader(is_train, args):
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[feed_image, feed_label],
capacity=64,
capacity=capacity,
use_double_buffer=True,
iterable=True)
......@@ -432,14 +439,14 @@ def print_info(info_mode,
# train and mixup output
if len(metrics) == 2:
loss, lr = metrics
print(
logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr,
"%2.4f sec" % time_info))
# train and no mixup output
elif len(metrics) == 4:
loss, acc1, acc5, lr = metrics
print(
logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, elapse {6}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info,
......@@ -447,7 +454,7 @@ def print_info(info_mode,
# test output
elif len(metrics) == 3:
loss, acc1, acc5 = metrics
print(
logger.info(
"[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, elapse {5}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%2.4f sec" % time_info,
......@@ -462,13 +469,13 @@ def print_info(info_mode,
## TODO add time elapse
if len(metrics) == 5:
train_loss, _, test_loss, test_acc1, test_acc5 = metrics
print(
logger.info(
"[End pass {0}]\ttrain_loss {1}, test_loss {2}, test_acc1 {3}, test_acc{5} {4}".
format(pass_id, "%.5f" % train_loss, "%.5f" % test_loss, "%.5f"
% test_acc1, "%.5f" % test_acc5, min(class_dim, 5)))
elif len(metrics) == 7:
train_loss, train_acc1, train_acc5, _, test_loss, test_acc1, test_acc5 = metrics
print(
logger.info(
"[End pass {0}]\ttrain_loss {1}, train_acc1 {2}, train_acc{7} {3},test_loss {4}, test_acc1 {5}, test_acc{7} {6}".
format(pass_id, "%.5f" % train_loss, "%.5f" % train_acc1, "%.5f"
% train_acc5, "%.5f" % test_loss, "%.5f" % test_acc1,
......@@ -493,13 +500,13 @@ def print_ce(device_num, metrics, time_info):
train_speed = np.mean(np.array(time_info[10:]))
print("kpis\ttrain_cost_card{}\t{}".format(device_num, train_loss))
print("kpis\ttrain_acc1_card{}\t{}".format(device_num, train_acc1))
print("kpis\ttrain_acc5_card{}\t{}".format(device_num, train_acc5))
print("kpis\ttest_cost_card{}\t{}".format(device_num, test_loss))
print("kpis\ttest_acc1_card{}\t{}".format(device_num, test_acc1))
print("kpis\ttest_acc5_card{}\t{}".format(device_num, test_acc5))
print("kpis\ttrain_speed_card{}\t{}".format(device_num, train_speed))
logger.info("kpis\ttrain_cost_card{}\t{}".format(device_num, train_loss))
logger.info("kpis\ttrain_acc1_card{}\t{}".format(device_num, train_acc1))
logger.info("kpis\ttrain_acc5_card{}\t{}".format(device_num, train_acc5))
logger.info("kpis\ttest_cost_card{}\t{}".format(device_num, test_loss))
logger.info("kpis\ttest_acc1_card{}\t{}".format(device_num, test_acc1))
logger.info("kpis\ttest_acc5_card{}\t{}".format(device_num, test_acc5))
logger.info("kpis\ttrain_speed_card{}\t{}".format(device_num, train_speed))
def best_strategy_compiled(args,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册