未验证 提交 020d1072 编写于 作者: W wuyefeilin 提交者: GitHub

Save load update (#257)

* update model save load

* first add

* update model save and load

* update train.py

* update LaneNet model saving and loading

* adapt slim to paddle-1.8

* update distillation save and load

* update nas model save and load

* update model load op

* update utils.py

* update load_model_utils.py

* update model saving and loading
上级 1b36f1ab
......@@ -205,11 +205,9 @@ def load_pretrained_weights(exe, main_prog, weights_dir, fuse_bn=False):
vars_to_load.append(var)
logging.debug("Weight {} will be load".format(var.name))
fluid.io.load_vars(
executor=exe,
dirname=weights_dir,
main_program=main_prog,
vars=vars_to_load)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
logging.warning(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
......
......@@ -122,7 +122,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou
np.set_printoptions(
......
......@@ -40,10 +40,10 @@ from pdseg.utils.timer import Timer, calculate_eta
from reader import LaneNetDataset
from models.model_builder import build_model
from models.model_builder import ModelPhase
from models.model_builder import parse_shape_from_file
from eval import evaluate
from vis import visualize
from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
def parse_args():
......@@ -101,37 +101,6 @@ def parse_args():
return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
......@@ -141,29 +110,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
fluid.save(program, os.path.join(ckpt_dir, 'model'))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
Load checkpoiont for resuming training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
......@@ -178,7 +140,6 @@ def load_checkpoint(exe, program):
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
......@@ -271,44 +232,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_shape != shape:
print(var.name, var_shape, shape)
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......@@ -393,8 +317,7 @@ def train(cfg):
avg_emb_loss, avg_acc, avg_fp, avg_fn, speed,
calculate_eta(all_step - step, speed)))
if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/speed', speed, step)
sys.stdout.flush()
......@@ -423,8 +346,7 @@ def train(cfg):
use_gpu=args.use_gpu,
use_mpio=args.use_mpio)
if args.use_vdl:
log_writer.add_scalar('Evaluate/accuracy', accuracy,
step)
log_writer.add_scalar('Evaluate/accuracy', accuracy, step)
log_writer.add_scalar('Evaluate/fp', fp, step)
log_writer.add_scalar('Evaluate/fn', fn, step)
......
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import six
import numpy as np
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64')
for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64')
_ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
weight = np.fromstring(f.read(data_size), dtype='float32')
f.close()
return np.reshape(weight, tensor_shape)
def load_pdparams(exe, main_prog, model_dir):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
from paddle.fluid.framework import Program
vars_to_load = list()
vars_not_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if var.name not in params_dict:
print("{} is not in saved model".format(var.name))
vars_not_load.append(var.name)
continue
if var.shape != params_dict[var.name].shape:
unused_vars.append(var.name)
vars_not_load.append(var.name)
print(
"[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
.format(var.name, params_dict[var.name].shape, var.shape))
continue
vars_to_load.append(var)
for var_name in unused_vars:
del params_dict[var_name]
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), model_dir))
def load_pretrained_weights(exe, main_prog, weights_dir):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.exists(osp.join(weights_dir, "model.pdparams")):
return load_pdparams(exe, main_prog, weights_dir)
import paddle.fluid as fluid
vars_to_load = list()
vars_not_load = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
print("[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
vars_not_load.append(var)
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
if pretrained_shape != actual_shape:
print(
"[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
.format(weights_dir, var.name, pretrained_shape, actual_shape))
vars_not_load.append(var)
continue
vars_to_load.append(var)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), weights_dir))
......@@ -45,6 +45,7 @@ from models.model_builder import ModelPhase
from utils import lanenet_postprocess
import matplotlib.pyplot as plt
def parse_args():
parser = argparse.ArgumentParser(description='PaddeSeg visualization tools')
parser.add_argument(
......@@ -106,7 +107,6 @@ def minmax_scale(input_arr):
return output_arr
def visualize(cfg,
vis_file_list=None,
use_gpu=False,
......@@ -119,7 +119,6 @@ def visualize(cfg,
if vis_file_list is None:
vis_file_list = cfg.DATASET.TEST_FILE_LIST
dataset = LaneNetDataset(
file_list=vis_file_list,
mode=ModelPhase.VISUAL,
......@@ -139,7 +138,12 @@ def visualize(cfg,
ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
save_dir = os.path.join(vis_dir, 'visual_results')
makedirs(save_dir)
......@@ -161,22 +165,26 @@ def visualize(cfg,
for i in range(num_imgs):
gt_image = org_imgs[i]
binary_seg_image, instance_seg_image = segLogits[i].squeeze(-1), emLogits[i].transpose((1,2,0))
binary_seg_image, instance_seg_image = segLogits[i].squeeze(
-1), emLogits[i].transpose((1, 2, 0))
postprocess_result = postprocessor.postprocess(
binary_seg_result=binary_seg_image,
instance_seg_result=instance_seg_image,
source_image=gt_image
)
pred_binary_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_binary'))
pred_lane_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_lane'))
pred_instance_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_instance'))
source_image=gt_image)
pred_binary_fn = os.path.join(
save_dir, to_png_fn(img_names[i], name='_pred_binary'))
pred_lane_fn = os.path.join(
save_dir, to_png_fn(img_names[i], name='_pred_lane'))
pred_instance_fn = os.path.join(
save_dir, to_png_fn(img_names[i], name='_pred_instance'))
dirname = os.path.dirname(pred_binary_fn)
makedirs(dirname)
mask_image = postprocess_result['mask_image']
for i in range(4):
instance_seg_image[:, :, i] = minmax_scale(instance_seg_image[:, :, i])
instance_seg_image[:, :, i] = minmax_scale(
instance_seg_image[:, :, i])
embedding_image = np.array(instance_seg_image).astype(np.uint8)
plt.figure('mask_image')
......@@ -189,13 +197,13 @@ def visualize(cfg,
plt.imshow(binary_seg_image * 255, cmap='gray')
plt.show()
cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8))
cv2.imwrite(pred_binary_fn,
np.array(binary_seg_image * 255).astype(np.uint8))
cv2.imwrite(pred_lane_fn, postprocess_result['source_image'])
cv2.imwrite(pred_instance_fn, mask_image)
print(pred_lane_fn, 'saved!')
if __name__ == '__main__':
args = parse_args()
if args.cfg_file is not None:
......
......@@ -201,11 +201,9 @@ def load_pretrain_weights(exe, main_prog, weights_dir, fuse_bn=False):
vars_to_load.append(var)
logging.debug("Weight {} will be load".format(var.name))
fluid.io.load_vars(
executor=exe,
dirname=weights_dir,
main_program=main_prog,
vars=vars_to_load)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
logging.warning(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
......
......@@ -22,13 +22,9 @@ import os
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
import sys
import time
import argparse
import functools
import pprint
import cv2
import numpy as np
import paddle
import paddle.fluid as fluid
from utils.config import cfg
......@@ -116,7 +112,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou
np.set_printoptions(
......
......@@ -49,6 +49,7 @@ def parse_args():
sys.exit(1)
return parser.parse_args()
def export_inference_config():
deploy_cfg = '''DEPLOY:
USE_GPU : 1
......@@ -66,9 +67,8 @@ def export_inference_config():
PREDICTOR_MODE : "ANALYSIS"
BATCH_SIZE : 1
''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME,
cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE,
cfg.MEAN, cfg.STD, cfg.DATASET.IMAGE_TYPE,
cfg.DATASET.NUM_CLASSES, len(cfg.STD))
cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.MEAN, cfg.STD,
cfg.DATASET.IMAGE_TYPE, cfg.DATASET.NUM_CLASSES, len(cfg.STD))
if not os.path.exists(cfg.FREEZE.SAVE_DIR):
os.mkdir(cfg.FREEZE.SAVE_DIR)
yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml')
......@@ -94,7 +94,13 @@ def export_inference_model(args):
infer_prog = infer_prog.clone(for_test=True)
if os.path.exists(cfg.TEST.TEST_MODEL):
fluid.io.load_params(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog)
print('load test model:', cfg.TEST.TEST_MODEL)
try:
fluid.load(infer_prog, os.path.join(cfg.TEST.TEST_MODEL, 'model'),
exe)
except:
fluid.io.load_params(
exe, cfg.TEST.TEST_MODEL, main_program=infer_prog)
else:
print("TEST.TEST_MODEL diretory is empty!")
exit(-1)
......
......@@ -26,9 +26,7 @@ import argparse
import pprint
import random
import shutil
import functools
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import profiler
......@@ -39,10 +37,10 @@ from metrics import ConfusionMatrix
from reader import SegDataset
from models.model_builder import build_model
from models.model_builder import ModelPhase
from models.model_builder import parse_shape_from_file
from eval import evaluate
from vis import visualize
from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
def parse_args():
......@@ -118,38 +116,7 @@ def parse_args():
return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
def save_checkpoint(program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
"""
......@@ -158,29 +125,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
fluid.save(program, os.path.join(ckpt_dir, 'model'))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
Load checkpoiont for resuming training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
......@@ -195,7 +155,6 @@ def load_checkpoint(exe, program):
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
......@@ -247,8 +206,6 @@ def train(cfg):
yield item[0], item[1], item[2]
# Get device environment
# places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
# place = places[0]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
......@@ -304,42 +261,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......@@ -418,12 +340,9 @@ def train(cfg):
step)
log_writer.add_scalar('Train/mean_acc', mean_acc,
step)
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/step/sec', speed, step)
sys.stdout.flush()
avg_loss = 0.0
cm.zero_matrix()
......@@ -445,12 +364,9 @@ def train(cfg):
).format(epoch, step, lr[0], avg_loss, speed,
calculate_eta(all_step - step, speed)))
if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/speed', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/speed', speed, step)
sys.stdout.flush()
avg_loss = 0.0
timer.restart()
......@@ -470,7 +386,7 @@ def train(cfg):
if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, train_prog, epoch)
ckpt_dir = save_checkpoint(train_prog, epoch)
if args.do_eval:
print("Evaluation start")
......@@ -480,10 +396,8 @@ def train(cfg):
use_gpu=args.use_gpu,
use_mpio=args.use_mpio)
if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
if mean_iou > best_mIoU:
best_mIoU = mean_iou
......@@ -505,7 +419,7 @@ def train(cfg):
# save final model
if cfg.TRAINER_ID == 0:
save_checkpoint(exe, train_prog, 'final')
save_checkpoint(train_prog, 'final')
def main(args):
......
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import six
import numpy as np
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64')
for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64')
_ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
weight = np.fromstring(f.read(data_size), dtype='float32')
f.close()
return np.reshape(weight, tensor_shape)
def load_pdparams(exe, main_prog, model_dir):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
from paddle.fluid.framework import Program
vars_to_load = list()
vars_not_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if var.name not in params_dict:
print("{} is not in saved model".format(var.name))
vars_not_load.append(var.name)
continue
if var.shape != params_dict[var.name].shape:
unused_vars.append(var.name)
vars_not_load.append(var.name)
print(
"[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
.format(var.name, params_dict[var.name].shape, var.shape))
continue
vars_to_load.append(var)
for var_name in unused_vars:
del params_dict[var_name]
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), model_dir))
def load_pretrained_weights(exe, main_prog, weights_dir):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.exists(osp.join(weights_dir, "model.pdparams")):
return load_pdparams(exe, main_prog, weights_dir)
import paddle.fluid as fluid
vars_to_load = list()
vars_not_load = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
print("[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
vars_not_load.append(var)
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
if pretrained_shape != actual_shape:
print(
"[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
.format(weights_dir, var.name, pretrained_shape, actual_shape))
vars_not_load.append(var)
continue
vars_to_load.append(var)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), weights_dir))
......@@ -115,7 +115,12 @@ def visualize(cfg,
ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
save_dir = vis_dir
makedirs(save_dir)
......@@ -169,18 +174,13 @@ def visualize(cfg,
print("VisualDL visualization epoch", epoch)
pred_mask_np = np.array(pred_mask.convert("RGB"))
log_writer.add_image(
"Predict/{}".format(img_name),
pred_mask_np,
epoch)
log_writer.add_image("Predict/{}".format(img_name),
pred_mask_np, epoch)
# Original image
# BGR->RGB
img = cv2.imread(
os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1]
log_writer.add_image(
"Images/{}".format(img_name),
img,
epoch)
img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
img_name))[..., ::-1]
log_writer.add_image("Images/{}".format(img_name), img, epoch)
# add ground truth (label) images
grt = grts[i]
if grt is not None:
......@@ -189,10 +189,8 @@ def visualize(cfg,
grt_pil.putpalette(color_map)
grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
grt = np.array(grt_pil.convert("RGB"))
log_writer.add_image(
"Label/{}".format(img_name),
grt,
epoch)
log_writer.add_image("Label/{}".format(img_name), grt,
epoch)
# If in local_test mode, only visualize 5 images just for testing
# procedure
......
......@@ -44,6 +44,7 @@ from model_builder import parse_shape_from_file
from eval import evaluate
from vis import visualize
from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
import solver
from paddleslim.dist.single_distiller import merge, l2_loss
......@@ -116,38 +117,7 @@ def parse_args():
return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
def save_checkpoint(program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
"""
......@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
fluid.save(program, os.path.join(ckpt_dir, 'model'))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
Load checkpoiont for resuming training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
......@@ -193,7 +156,6 @@ def load_checkpoint(exe, program):
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
......@@ -289,7 +251,11 @@ def train(cfg):
ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR
assert ckpt_dir is not None
print('load teacher model:', ckpt_dir)
fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program)
if os.path.exists(ckpt_dir):
try:
fluid.load(teacher_program, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program)
# cfg = load_config(FLAGS.config)
cfg.update_from_file(args.cfg_file)
......@@ -355,42 +321,8 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, fluid.default_main_program())
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in fluid.default_main_program().list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, fluid.default_main_program(),
cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......@@ -475,12 +407,9 @@ def train(cfg):
step)
log_writer.add_scalar('Train/mean_acc', mean_acc,
step)
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/step/sec', speed, step)
sys.stdout.flush()
avg_loss = 0.0
cm.zero_matrix()
......@@ -503,16 +432,13 @@ def train(cfg):
speed = args.log_steps / timer.elapsed_time()
print((
"epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}"
).format(epoch, step, lr[0], avg_loss,
avg_t_loss, avg_d_loss, speed,
).format(epoch, step, lr[0], avg_loss, avg_t_loss,
avg_d_loss, speed,
calculate_eta(all_step - step, speed)))
if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/speed', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/speed', speed, step)
sys.stdout.flush()
avg_loss = 0.0
avg_t_loss = 0.0
......@@ -527,7 +453,7 @@ def train(cfg):
if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch)
ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)
if args.do_eval:
print("Evaluation start")
......@@ -537,10 +463,8 @@ def train(cfg):
use_gpu=args.use_gpu,
use_mpio=args.use_mpio)
if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
if mean_iou > best_mIoU:
best_mIoU = mean_iou
......@@ -560,11 +484,11 @@ def train(cfg):
ckpt_dir=ckpt_dir,
log_writer=log_writer)
if cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch)
ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)
# save final model
if cfg.TRAINER_ID == 0:
save_checkpoint(exe, fluid.default_main_program(), 'final')
save_checkpoint(fluid.default_main_program(), 'final')
def main(args):
......
......@@ -123,7 +123,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou
np.set_printoptions(
......
......@@ -47,6 +47,7 @@ from model_builder import parse_shape_from_file
from eval_nas import evaluate
from vis import visualize
from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
from mobilenetv2_search_space import MobileNetV2SpaceSeg
from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
......@@ -116,38 +117,7 @@ def parse_args():
return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
def save_checkpoint(program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
"""
......@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
fluid.save(program, os.path.join(ckpt_dir, 'model'))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
Load checkpoiont for resuming training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
......@@ -193,7 +156,6 @@ def load_checkpoint(exe, program):
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
......@@ -245,8 +207,6 @@ def train(cfg):
yield item[0], item[1], item[2]
# Get device environment
# places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
# place = places[0]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
......@@ -326,43 +286,8 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info(
"{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, train_prog,
cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......@@ -419,8 +344,7 @@ def train(cfg):
except Exception as e:
print(e)
if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH:
ckpt_dir = save_checkpoint(exe, train_prog,
'{}_tmp'.format(port))
ckpt_dir = save_checkpoint(train_prog, '{}_tmp'.format(port))
_, mean_iou, _, mean_acc = evaluate(
cfg=cfg,
arch=arch,
......
......@@ -46,6 +46,7 @@ from models.model_builder import parse_shape_from_file
from eval_prune import evaluate
from vis import visualize
from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
from paddleslim.prune import Pruner, save_model
from paddleslim.analysis import flops
......@@ -285,42 +286,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......@@ -409,12 +375,9 @@ def train(cfg):
step)
log_writer.add_scalar('Train/mean_acc', mean_acc,
step)
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/step/sec', speed, step)
sys.stdout.flush()
avg_loss = 0.0
cm.zero_matrix()
......@@ -436,12 +399,9 @@ def train(cfg):
).format(epoch, step, lr[0], avg_loss, speed,
calculate_eta(all_step - step, speed)))
if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss,
step)
log_writer.add_scalar('Train/lr', lr[0],
step)
log_writer.add_scalar('Train/speed', speed,
step)
log_writer.add_scalar('Train/loss', avg_loss, step)
log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/speed', speed, step)
sys.stdout.flush()
avg_loss = 0.0
timer.restart()
......@@ -464,10 +424,8 @@ def train(cfg):
use_gpu=args.use_gpu,
use_mpio=args.use_mpio)
if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
# Use VisualDL to visualize results
if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
......
......@@ -40,7 +40,8 @@ from models.model_builder import parse_shape_from_file
from eval_quant import evaluate
from vis import visualize
from utils import dist_utils
from train import save_vars, save_checkpoint, load_checkpoint, update_best_model, print_info
from utils.load_model_utils import load_pretrained_weights
from train import update_best_model, print_info
from paddleslim.quant import quant_aware
......@@ -103,6 +104,55 @@ def parse_args():
return parser.parse_args()
def save_checkpoint(exe, program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
"""
ckpt_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, str(ckpt_name))
print("Save model checkpoint to {}".format(ckpt_dir))
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
fluid.io.save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
epoch_name = os.path.basename(model_path)
# If resume model is final model
if epoch_name == 'final':
begin_epoch = cfg.SOLVER.NUM_EPOCHS
# If resume model path is end of digit, restore epoch status
elif epoch_name.isdigit():
epoch = int(epoch_name)
begin_epoch = epoch + 1
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
def train_quant(cfg):
startup_prog = fluid.Program()
train_prog = fluid.Program()
......@@ -182,42 +232,7 @@ def train_quant(cfg):
begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册