未验证 提交 57705aa5 编写于 作者: R ruri 提交者: GitHub

refine image classification dataloader, optimizer, etc. (#4101)

上级 c889c814
...@@ -53,6 +53,9 @@ add_arg('same_feed', int, 0, "Whether to feed same im ...@@ -53,6 +53,9 @@ add_arg('same_feed', int, 0, "Whether to feed same im
add_arg('print_step', int, 1, "the batch step to print info") add_arg('print_step', int, 1, "the batch step to print info")
# yapf: enable # yapf: enable
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def eval(args): def eval(args):
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
...@@ -159,7 +162,7 @@ def eval(args): ...@@ -159,7 +162,7 @@ def eval(args):
info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \ info = "Testbatch {0},loss {1}, acc1 {2},acc5 {3},time {4}".format(real_iter, \
"%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \ "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, \
"%2.2f sec" % period) "%2.2f sec" % period)
print(info) logger.info(info)
sys.stdout.flush() sys.stdout.flush()
parallel_id = [] parallel_id = []
...@@ -179,7 +182,7 @@ def eval(args): ...@@ -179,7 +182,7 @@ def eval(args):
"test_acc5": test_acc5 "test_acc5": test_acc5
} }
save_json(info_dict, args.save_json_path) save_json(info_dict, args.save_json_path)
print(info) logger.info(info)
sys.stdout.flush() sys.stdout.flush()
......
...@@ -24,6 +24,7 @@ import numpy as np ...@@ -24,6 +24,7 @@ import numpy as np
import argparse import argparse
import functools import functools
import re import re
import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -56,6 +57,9 @@ add_arg('batch_size', int, 8, "batch_size on all the d ...@@ -56,6 +57,9 @@ add_arg('batch_size', int, 8, "batch_size on all the d
add_arg('save_json_path', str, None, "save output to a json file") add_arg('save_json_path', str, None, "save output to a json file")
# yapf: enable # yapf: enable
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def infer(args): def infer(args):
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
...@@ -117,7 +121,7 @@ def infer(args): ...@@ -117,7 +121,7 @@ def infer(args):
executor=exe, executor=exe,
model_filename='model', model_filename='model',
params_filename='params') params_filename='params')
print("model: ", args.model, " is already saved") logger.info("model: ", args.model, " is already saved")
exit(0) exit(0)
imagenet_reader = reader.ImageNetReader() imagenet_reader = reader.ImageNetReader()
...@@ -126,7 +130,8 @@ def infer(args): ...@@ -126,7 +130,8 @@ def infer(args):
TOPK = args.topk TOPK = args.topk
if os.path.exists(args.class_map_path): if os.path.exists(args.class_map_path):
print("The map of readable label and numerical label has been found!") logger.info(
"The map of readable label and numerical label has been found!")
with open(args.class_map_path) as f: with open(args.class_map_path) as f:
label_dict = {} label_dict = {}
strinfo = re.compile(r"\d+ ") strinfo = re.compile(r"\d+ ")
...@@ -174,7 +179,7 @@ def infer(args): ...@@ -174,7 +179,7 @@ def infer(args):
info[real_id]['score'], info[real_id]['class'] = str(res[ info[real_id]['score'], info[real_id]['class'] = str(res[
pred_label]), str(pred_label) pred_label]), str(pred_label)
print(real_id, info[real_id]) logger.info(real_id, info[real_id])
sys.stdout.flush() sys.stdout.flush()
if args.save_json_path: if args.save_json_path:
......
...@@ -16,12 +16,16 @@ import argparse ...@@ -16,12 +16,16 @@ import argparse
import numpy as np import numpy as np
import cv2 import cv2
import os import os
import logging
from paddle import fluid from paddle import fluid
from paddle.fluid.core import PaddleTensor from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import create_paddle_predictor from paddle.fluid.core import create_paddle_predictor
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def resize_short(img, target_size, interpolation=None): def resize_short(img, target_size, interpolation=None):
"""resize image """resize image
...@@ -116,8 +120,8 @@ def predict(args): ...@@ -116,8 +120,8 @@ def predict(args):
cls = np.argmax(output) cls = np.argmax(output)
score = output[cls] score = output[cls]
print("class: ", cls) logger.info("class: ", cls)
print("score: ", score) logger.info("score: ", score)
return return
......
...@@ -16,6 +16,7 @@ import os ...@@ -16,6 +16,7 @@ import os
import math import math
import random import random
import functools import functools
import logging
import numpy as np import numpy as np
import cv2 import cv2
...@@ -26,6 +27,9 @@ from PIL import Image ...@@ -26,6 +27,9 @@ from PIL import Image
policy = None policy = None
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
...@@ -257,7 +261,7 @@ def process_batch_data(input_data, settings, mode, color_jitter, rotate): ...@@ -257,7 +261,7 @@ def process_batch_data(input_data, settings, mode, color_jitter, rotate):
batch_data.append( batch_data.append(
process_image(sample, settings, mode, color_jitter, rotate)) process_image(sample, settings, mode, color_jitter, rotate))
else: else:
print("File not exist : %s" % sample[0]) logger.info("File not exist : %s" % sample[0])
return batch_data return batch_data
...@@ -272,17 +276,19 @@ class ImageNetReader: ...@@ -272,17 +276,19 @@ class ImageNetReader:
def _get_single_card_bs(self, settings, mode): def _get_single_card_bs(self, settings, mode):
if settings.use_gpu: if settings.use_gpu:
if mode == "val" and hasattr(settings, "test_batch_size"): if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // paddle.fluid.core.get_cuda_device_count( single_card_bs = int(
) settings.test_batch_size
) // paddle.fluid.core.get_cuda_device_count()
else: else:
single_card_bs = settings.batch_size // paddle.fluid.core.get_cuda_device_count( single_card_bs = int(
) settings.
batch_size) // paddle.fluid.core.get_cuda_device_count()
else: else:
if mode == "val" and hasattr(settings, "test_batch_size"): if mode == "val" and hasattr(settings, "test_batch_size"):
single_card_bs = settings.test_batch_size // int( single_card_bs = int(settings.test_batch_size) // int(
os.environ.get('CPU_NUM', 1)) os.environ.get('CPU_NUM', 1))
else: else:
single_card_bs = settings.batch_size // int( single_card_bs = int(settings.batch_size) // int(
os.environ.get('CPU_NUM', 1)) os.environ.get('CPU_NUM', 1))
return single_card_bs return single_card_bs
...@@ -303,8 +309,8 @@ class ImageNetReader: ...@@ -303,8 +309,8 @@ class ImageNetReader:
with open(file_list) as flist: with open(file_list) as flist:
full_lines = [line.strip() for line in flist] full_lines = [line.strip() for line in flist]
if mode != "test" and len(full_lines) < settings.batch_size: if mode != "test" and len(full_lines) < settings.batch_size:
print( logger.error(
"Warning: The number of the whole data ({}) is smaller than the batch_size ({}), and drop_last is turnning on, so nothing will feed in program, Terminated now. Please reset batch_size to a smaller number or feed more data!". "Error: The number of the whole data ({}) is smaller than the batch_size ({}), and drop_last is turnning on, so nothing will feed in program, Terminated now. Please set the batch_size to a smaller number or feed more data!".
format(len(full_lines), settings.batch_size)) format(len(full_lines), settings.batch_size))
os._exit(1) os._exit(1)
if num_trainers > 1 and mode == "train": if num_trainers > 1 and mode == "train":
...@@ -318,8 +324,8 @@ class ImageNetReader: ...@@ -318,8 +324,8 @@ class ImageNetReader:
batch_data = [] batch_data = []
if (mode == "train" or mode == "val") and settings.same_feed: if (mode == "train" or mode == "val") and settings.same_feed:
temp_file = full_lines[0] temp_file = full_lines[0]
print("Same images({},nums:{}) will feed in the net".format( logger.info("Same images({},nums:{}) will feed in the net".
str(temp_file), settings.same_feed)) format(str(temp_file), settings.same_feed))
full_lines = [] full_lines = []
for i in range(settings.same_feed): for i in range(settings.same_feed):
full_lines.append(temp_file) full_lines.append(temp_file)
......
...@@ -19,6 +19,7 @@ from __future__ import print_function ...@@ -19,6 +19,7 @@ from __future__ import print_function
import os import os
import time import time
import sys import sys
import logging
import numpy as np import numpy as np
import paddle import paddle
...@@ -29,6 +30,9 @@ from utils import * ...@@ -29,6 +30,9 @@ from utils import *
import models import models
from build_model import create_model from build_model import create_model
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def build_program(is_train, main_prog, startup_prog, args): def build_program(is_train, main_prog, startup_prog, args):
"""build program, and add backward op in program accroding to different mode """build program, and add backward op in program accroding to different mode
...@@ -259,12 +263,12 @@ def train(args): ...@@ -259,12 +263,12 @@ def train(args):
if trainer_id == 0 and args.validate: if trainer_id == 0 and args.validate:
if args.use_ema: if args.use_ema:
print('ExponentialMovingAverage validate start...') logger.info('ExponentialMovingAverage validate start...')
with ema.apply(exe): with ema.apply(exe):
validate(args, test_iter, exe, test_prog, test_fetch_list, validate(args, test_iter, exe, test_prog, test_fetch_list,
pass_id, train_batch_metrics_record, pass_id, train_batch_metrics_record,
compiled_train_prog) compiled_train_prog)
print('ExponentialMovingAverage validate over!') logger.info('ExponentialMovingAverage validate over!')
validate(args, test_iter, exe, test_prog, test_fetch_list, pass_id, validate(args, test_iter, exe, test_prog, test_fetch_list, pass_id,
train_batch_metrics_record, train_batch_time_record, train_batch_metrics_record, train_batch_time_record,
......
...@@ -18,6 +18,9 @@ from __future__ import print_function ...@@ -18,6 +18,9 @@ from __future__ import print_function
import os import os
import paddle.fluid as fluid import paddle.fluid as fluid
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def nccl2_prepare(args, startup_prog, main_prog): def nccl2_prepare(args, startup_prog, main_prog):
config = fluid.DistributeTranspilerConfig() config = fluid.DistributeTranspilerConfig()
...@@ -81,8 +84,8 @@ def prepare_for_multi_process(exe, build_strategy, train_prog): ...@@ -81,8 +84,8 @@ def prepare_for_multi_process(exe, build_strategy, train_prog):
trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0))
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
if num_trainers < 2: return if num_trainers < 2: return
print("PADDLE_TRAINERS_NUM", num_trainers) logger.info("PADDLE_TRAINERS_NUM", num_trainers)
print("PADDLE_TRAINER_ID", trainer_id) logger.info("PADDLE_TRAINER_ID", trainer_id)
build_strategy.num_trainers = num_trainers build_strategy.num_trainers = num_trainers
build_strategy.trainer_id = trainer_id build_strategy.trainer_id = trainer_id
# NOTE(zcd): use multi processes to train the model, # NOTE(zcd): use multi processes to train the model,
......
...@@ -16,8 +16,6 @@ from __future__ import absolute_import ...@@ -16,8 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util
import numpy as np
import six import six
import argparse import argparse
import functools import functools
...@@ -28,13 +26,21 @@ import warnings ...@@ -28,13 +26,21 @@ import warnings
import signal import signal
import json import json
import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.wrapped_decorator import signature_safe_contextmanager from paddle.fluid.wrapped_decorator import signature_safe_contextmanager
from paddle.fluid.framework import Program, program_guard, name_scope, default_main_program from paddle.fluid.framework import Program, program_guard, name_scope, default_main_program
from paddle.fluid import unique_name, layers from paddle.fluid import unique_name, layers
import distutils.util
from utils import dist_utils from utils import dist_utils
from utils.optimizer import Optimizer
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def print_arguments(args): def print_arguments(args):
"""Print argparse's arguments. """Print argparse's arguments.
...@@ -51,10 +57,11 @@ def print_arguments(args): ...@@ -51,10 +57,11 @@ def print_arguments(args):
:param args: Input argparse.Namespace for printing. :param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("------------- Configuration Arguments -------------")
logger.info("------------- Configuration Arguments -------------")
for arg, value in sorted(six.iteritems(vars(args))): for arg, value in sorted(six.iteritems(vars(args))):
print("%25s : %s" % (arg, value)) logger.info("%25s : %s" % (arg, value))
print("----------------------------------------------------") logger.info("----------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs): def add_arguments(argname, type, default, help, argparser, **kwargs):
...@@ -104,7 +111,7 @@ def parse_args(): ...@@ -104,7 +111,7 @@ def parse_args():
add_arg('num_epochs', int, 120, "The number of total epochs.") add_arg('num_epochs', int, 120, "The number of total epochs.")
add_arg('class_dim', int, 1000, "The number of total classes.") add_arg('class_dim', int, 1000, "The number of total classes.")
add_arg('batch_size', int, 8, "Minibatch size on all the devices.") add_arg('batch_size', int, 8, "Minibatch size on all the devices.")
add_arg('test_batch_size', int, None, "Test batch size on all the devices.") add_arg('test_batch_size', int, 8, "Test batch size on all the devices.")
add_arg('lr', float, 0.1, "The learning rate.") add_arg('lr', float, 0.1, "The learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.") add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
add_arg('l2_decay', float, 1e-4, "The l2_decay parameter.") add_arg('l2_decay', float, 1e-4, "The l2_decay parameter.")
...@@ -174,7 +181,7 @@ def check_gpu(): ...@@ -174,7 +181,7 @@ def check_gpu():
try: try:
if args.use_gpu and not fluid.is_compiled_with_cuda(): if args.use_gpu and not fluid.is_compiled_with_cuda():
print(err) logger.error(err)
sys.exit(1) sys.exit(1)
except Exception as e: except Exception as e:
pass pass
...@@ -192,7 +199,7 @@ def check_version(): ...@@ -192,7 +199,7 @@ def check_version():
try: try:
fluid.require_version('1.6.0') fluid.require_version('1.6.0')
except Exception as e: except Exception as e:
print(err) logger.error(err)
sys.exit(1) sys.exit(1)
...@@ -202,6 +209,8 @@ def check_args(args): ...@@ -202,6 +209,8 @@ def check_args(args):
Args: Args:
all arguments all arguments
""" """
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# check models name # check models name
sys.path.append("..") sys.path.append("..")
...@@ -211,29 +220,30 @@ def check_args(args): ...@@ -211,29 +220,30 @@ def check_args(args):
args.model, model_list) args.model, model_list)
# check learning rate strategy # check learning rate strategy
lr_strategy_list = [ lr_strategy_list = [l for l in dir(Optimizer) if not l.startswith('__')]
"piecewise_decay", "cosine_decay", "linear_decay",
"cosine_decay_warmup", "exponential_decay_warmup"
]
if args.lr_strategy not in lr_strategy_list: if args.lr_strategy not in lr_strategy_list:
warnings.warn( logger.warning(
"\n{} is not in lists: {}, \nUse default learning strategy now.". "\n{} is not in lists: {}, \nUse default learning strategy now!".
format(args.lr_strategy, lr_strategy_list)) format(args.lr_strategy, lr_strategy_list))
args.lr_strategy = "default_decay" args.lr_strategy = "default_decay"
# check confict of GoogLeNet and mixup # check confict of GoogLeNet and mixup
if args.model == "GoogLeNet": if args.model == "GoogLeNet":
assert args.use_mixup == False, "Cannot use mixup processing in GoogLeNet, please set use_mixup = False." assert args.use_mixup == False, "Cannot use mixup processing in GoogLeNet, please set use_mixup = False."
# check interpolation of reader settings
if args.interpolation: if args.interpolation:
assert args.interpolation in [ assert args.interpolation in [
0, 1, 2, 3, 4 0, 1, 2, 3, 4
], "Wrong interpolation, please set:\n0: cv2.INTER_NEAREST\n1: cv2.INTER_LINEAR\n2: cv2.INTER_CUBIC\n3: cv2.INTER_AREA\n4: cv2.INTER_LANCZOS4" ], "Wrong interpolation, please set:\n0: cv2.INTER_NEAREST\n1: cv2.INTER_LINEAR\n2: cv2.INTER_CUBIC\n3: cv2.INTER_AREA\n4: cv2.INTER_LANCZOS4"
# check padding type
if args.padding_type: if args.padding_type:
assert args.padding_type in [ assert args.padding_type in [
"SAME", "VALID", "DYNAMIC" "SAME", "VALID", "DYNAMIC"
], "Wrong padding_type, please set:\nSAME\nVALID\nDYNAMIC" ], "Wrong padding_type, please set:\nSAME\nVALID\nDYNAMIC"
# check checkpint and pretrained_model
assert args.checkpoint is None or args.pretrained_model is None, "Do not init model by checkpoint and pretrained_model both." assert args.checkpoint is None or args.pretrained_model is None, "Do not init model by checkpoint and pretrained_model both."
# check pretrained_model path for loading # check pretrained_model path for loading
...@@ -250,14 +260,6 @@ def check_args(args): ...@@ -250,14 +260,6 @@ def check_args(args):
args.checkpoint args.checkpoint
), "please support available checkpoint path for initing model." ), "please support available checkpoint path for initing model."
# check params for loading
"""
if args.save_params:
assert isinstance(args.save_params, str)
assert os.path.isdir(
args.save_params), "please support available save_params path."
"""
# check gpu: when using gpu, the number of visible cards should divide batch size # check gpu: when using gpu, the number of visible cards should divide batch size
if args.use_gpu: if args.use_gpu:
assert args.batch_size % fluid.core.get_cuda_device_count( assert args.batch_size % fluid.core.get_cuda_device_count(
...@@ -269,14 +271,17 @@ def check_args(args): ...@@ -269,14 +271,17 @@ def check_args(args):
args.data_dir args.data_dir
), "Data doesn't exist in {}, please load right path".format(args.data_dir) ), "Data doesn't exist in {}, please load right path".format(args.data_dir)
# check CE
if args.enable_ce: if args.enable_ce:
args.random_seed = 0 args.random_seed = 0
print("CE is running now!") logger.warning("CE is running now! already set random seed to 0")
# check class_dim
assert args.class_dim > 1, "class_dim must greater than 1" assert args.class_dim > 1, "class_dim must greater than 1"
# check dali preprocess
if args.use_dali: if args.use_dali:
print( logger.warning(
"DALI preprocessing is activated!!!\nWarning: 1. Please make sure paddlepaddle is compiled by GCC5.4 or later version!\n\t 2. Please make sure nightly builds DALI is installed correctly.\n----------------------------------------------------" "DALI preprocessing is activated!!!\nWarning: 1. Please make sure paddlepaddle is compiled by GCC5.4 or later version!\n\t 2. Please make sure nightly builds DALI is installed correctly.\n----------------------------------------------------"
) )
...@@ -291,7 +296,7 @@ def init_model(exe, args, program): ...@@ -291,7 +296,7 @@ def init_model(exe, args, program):
if args.checkpoint: if args.checkpoint:
fluid.io.load_persistables(exe, args.checkpoint, main_program=program) fluid.io.load_persistables(exe, args.checkpoint, main_program=program)
print("Finish initing model from %s" % (args.checkpoint)) logger.info("Finish initing model from %s" % (args.checkpoint))
if args.pretrained_model: if args.pretrained_model:
""" """
...@@ -332,7 +337,7 @@ def init_model(exe, args, program): ...@@ -332,7 +337,7 @@ def init_model(exe, args, program):
Parameter) and not fc_exclude_flag and os.path.exists( Parameter) and not fc_exclude_flag and os.path.exists(
os.path.join(args.pretrained_model, var.name)) os.path.join(args.pretrained_model, var.name))
print("Load pretrain weights from {}, exclude params {}.".format( logger.info("Load pretrain weights from {}, exclude params {}.".format(
args.pretrained_model, final_fc_name)) args.pretrained_model, final_fc_name))
vars = filter(is_parameter, program.list_vars()) vars = filter(is_parameter, program.list_vars())
fluid.io.load_vars( fluid.io.load_vars(
...@@ -347,7 +352,7 @@ def save_model(args, exe, train_prog, info): ...@@ -347,7 +352,7 @@ def save_model(args, exe, train_prog, info):
if not os.path.isdir(model_path): if not os.path.isdir(model_path):
os.makedirs(model_path) os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path, main_program=train_prog) fluid.io.save_persistables(exe, model_path, main_program=train_prog)
print("Already save model in %s" % (model_path)) logger.info("Already save model in %s" % (model_path))
def save_json(info, path): def save_json(info, path):
...@@ -382,6 +387,8 @@ def create_data_loader(is_train, args): ...@@ -382,6 +387,8 @@ def create_data_loader(is_train, args):
feed_y_a = fluid.data( feed_y_a = fluid.data(
name="feed_y_a", shape=[None, 1], dtype="int64", lod_level=0) name="feed_y_a", shape=[None, 1], dtype="int64", lod_level=0)
capacity = 64 if int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) <= 1 else 8
if is_train and args.use_mixup: if is_train and args.use_mixup:
feed_y_b = fluid.data( feed_y_b = fluid.data(
name="feed_y_b", shape=[None, 1], dtype="int64", lod_level=0) name="feed_y_b", shape=[None, 1], dtype="int64", lod_level=0)
...@@ -390,7 +397,7 @@ def create_data_loader(is_train, args): ...@@ -390,7 +397,7 @@ def create_data_loader(is_train, args):
data_loader = fluid.io.DataLoader.from_generator( data_loader = fluid.io.DataLoader.from_generator(
feed_list=[feed_image, feed_y_a, feed_y_b, feed_lam], feed_list=[feed_image, feed_y_a, feed_y_b, feed_lam],
capacity=64, capacity=capacity,
use_double_buffer=True, use_double_buffer=True,
iterable=True) iterable=True)
return data_loader, [feed_image, feed_y_a, feed_y_b, feed_lam] return data_loader, [feed_image, feed_y_a, feed_y_b, feed_lam]
...@@ -400,7 +407,7 @@ def create_data_loader(is_train, args): ...@@ -400,7 +407,7 @@ def create_data_loader(is_train, args):
data_loader = fluid.io.DataLoader.from_generator( data_loader = fluid.io.DataLoader.from_generator(
feed_list=[feed_image, feed_label], feed_list=[feed_image, feed_label],
capacity=64, capacity=capacity,
use_double_buffer=True, use_double_buffer=True,
iterable=True) iterable=True)
...@@ -432,14 +439,14 @@ def print_info(info_mode, ...@@ -432,14 +439,14 @@ def print_info(info_mode,
# train and mixup output # train and mixup output
if len(metrics) == 2: if len(metrics) == 2:
loss, lr = metrics loss, lr = metrics
print( logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}". "[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr,
"%2.4f sec" % time_info)) "%2.4f sec" % time_info))
# train and no mixup output # train and no mixup output
elif len(metrics) == 4: elif len(metrics) == 4:
loss, acc1, acc5, lr = metrics loss, acc1, acc5, lr = metrics
print( logger.info(
"[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, elapse {6}". "[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc{7} {4}, lr {5}, elapse {6}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info, "%.5f" % acc5, "%.5f" % lr, "%2.4f sec" % time_info,
...@@ -447,7 +454,7 @@ def print_info(info_mode, ...@@ -447,7 +454,7 @@ def print_info(info_mode,
# test output # test output
elif len(metrics) == 3: elif len(metrics) == 3:
loss, acc1, acc5 = metrics loss, acc1, acc5 = metrics
print( logger.info(
"[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, elapse {5}". "[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc{6} {4}, elapse {5}".
format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1,
"%.5f" % acc5, "%2.4f sec" % time_info, "%.5f" % acc5, "%2.4f sec" % time_info,
...@@ -462,13 +469,13 @@ def print_info(info_mode, ...@@ -462,13 +469,13 @@ def print_info(info_mode,
## TODO add time elapse ## TODO add time elapse
if len(metrics) == 5: if len(metrics) == 5:
train_loss, _, test_loss, test_acc1, test_acc5 = metrics train_loss, _, test_loss, test_acc1, test_acc5 = metrics
print( logger.info(
"[End pass {0}]\ttrain_loss {1}, test_loss {2}, test_acc1 {3}, test_acc{5} {4}". "[End pass {0}]\ttrain_loss {1}, test_loss {2}, test_acc1 {3}, test_acc{5} {4}".
format(pass_id, "%.5f" % train_loss, "%.5f" % test_loss, "%.5f" format(pass_id, "%.5f" % train_loss, "%.5f" % test_loss, "%.5f"
% test_acc1, "%.5f" % test_acc5, min(class_dim, 5))) % test_acc1, "%.5f" % test_acc5, min(class_dim, 5)))
elif len(metrics) == 7: elif len(metrics) == 7:
train_loss, train_acc1, train_acc5, _, test_loss, test_acc1, test_acc5 = metrics train_loss, train_acc1, train_acc5, _, test_loss, test_acc1, test_acc5 = metrics
print( logger.info(
"[End pass {0}]\ttrain_loss {1}, train_acc1 {2}, train_acc{7} {3},test_loss {4}, test_acc1 {5}, test_acc{7} {6}". "[End pass {0}]\ttrain_loss {1}, train_acc1 {2}, train_acc{7} {3},test_loss {4}, test_acc1 {5}, test_acc{7} {6}".
format(pass_id, "%.5f" % train_loss, "%.5f" % train_acc1, "%.5f" format(pass_id, "%.5f" % train_loss, "%.5f" % train_acc1, "%.5f"
% train_acc5, "%.5f" % test_loss, "%.5f" % test_acc1, % train_acc5, "%.5f" % test_loss, "%.5f" % test_acc1,
...@@ -493,13 +500,13 @@ def print_ce(device_num, metrics, time_info): ...@@ -493,13 +500,13 @@ def print_ce(device_num, metrics, time_info):
train_speed = np.mean(np.array(time_info[10:])) train_speed = np.mean(np.array(time_info[10:]))
print("kpis\ttrain_cost_card{}\t{}".format(device_num, train_loss)) logger.info("kpis\ttrain_cost_card{}\t{}".format(device_num, train_loss))
print("kpis\ttrain_acc1_card{}\t{}".format(device_num, train_acc1)) logger.info("kpis\ttrain_acc1_card{}\t{}".format(device_num, train_acc1))
print("kpis\ttrain_acc5_card{}\t{}".format(device_num, train_acc5)) logger.info("kpis\ttrain_acc5_card{}\t{}".format(device_num, train_acc5))
print("kpis\ttest_cost_card{}\t{}".format(device_num, test_loss)) logger.info("kpis\ttest_cost_card{}\t{}".format(device_num, test_loss))
print("kpis\ttest_acc1_card{}\t{}".format(device_num, test_acc1)) logger.info("kpis\ttest_acc1_card{}\t{}".format(device_num, test_acc1))
print("kpis\ttest_acc5_card{}\t{}".format(device_num, test_acc5)) logger.info("kpis\ttest_acc5_card{}\t{}".format(device_num, test_acc5))
print("kpis\ttrain_speed_card{}\t{}".format(device_num, train_speed)) logger.info("kpis\ttrain_speed_card{}\t{}".format(device_num, train_speed))
def best_strategy_compiled(args, def best_strategy_compiled(args,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册