未验证 提交 020d1072 编写于 作者: W wuyefeilin 提交者: GitHub

Save load update (#257)

* update model save load

* first add

* update model save and load

* update train.py

* update LaneNet model saving and loading

* adapt slim to paddle-1.8

* update distillation save and load

* update nas model save and load

* update model load op

* update utils.py

* update load_model_utils.py

* update model saving and loading
上级 1b36f1ab
...@@ -205,11 +205,9 @@ def load_pretrained_weights(exe, main_prog, weights_dir, fuse_bn=False): ...@@ -205,11 +205,9 @@ def load_pretrained_weights(exe, main_prog, weights_dir, fuse_bn=False):
vars_to_load.append(var) vars_to_load.append(var)
logging.debug("Weight {} will be load".format(var.name)) logging.debug("Weight {} will be load".format(var.name))
fluid.io.load_vars( params_dict = fluid.io.load_program_state(
executor=exe, weights_dir, var_list=vars_to_load)
dirname=weights_dir, fluid.io.set_program_state(main_prog, params_dict)
main_program=main_prog,
vars=vars_to_load)
if len(vars_to_load) == 0: if len(vars_to_load) == 0:
logging.warning( logging.warning(
"There is no pretrain weights loaded, maybe you should check you pretrain model!" "There is no pretrain weights loaded, maybe you should check you pretrain model!"
......
...@@ -122,6 +122,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): ...@@ -122,6 +122,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None: if ckpt_dir is not None:
print('load test model:', ckpt_dir) print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou # Use streaming confusion matrix to calculate mean_iou
......
...@@ -40,10 +40,10 @@ from pdseg.utils.timer import Timer, calculate_eta ...@@ -40,10 +40,10 @@ from pdseg.utils.timer import Timer, calculate_eta
from reader import LaneNetDataset from reader import LaneNetDataset
from models.model_builder import build_model from models.model_builder import build_model
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
from models.model_builder import parse_shape_from_file
from eval import evaluate from eval import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
def parse_args(): def parse_args():
...@@ -101,37 +101,6 @@ def parse_args(): ...@@ -101,37 +101,6 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name): def save_checkpoint(exe, program, ckpt_name):
""" """
Save checkpoint for evaluation or resume training Save checkpoint for evaluation or resume training
...@@ -141,29 +110,22 @@ def save_checkpoint(exe, program, ckpt_name): ...@@ -141,29 +110,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir) os.makedirs(ckpt_dir)
save_vars( fluid.save(program, os.path.join(ckpt_dir, 'model'))
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir return ckpt_dir
def load_checkpoint(exe, program): def load_checkpoint(exe, program):
""" """
Load checkpoiont from pretrained model directory for resume training Load checkpoiont for resuming training
""" """
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator # Check is path ended by path spearator
if model_path[-1] == os.sep: if model_path[-1] == os.sep:
model_path = model_path[0:-1] model_path = model_path[0:-1]
...@@ -178,7 +140,6 @@ def load_checkpoint(exe, program): ...@@ -178,7 +140,6 @@ def load_checkpoint(exe, program):
else: else:
raise ValueError("Resume model path is not valid!") raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!") print("Model checkpoint loaded successfully!")
return begin_epoch return begin_epoch
...@@ -271,44 +232,7 @@ def train(cfg): ...@@ -271,44 +232,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog) begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_shape != shape:
print(var.name, var_shape, shape)
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
...@@ -393,8 +317,7 @@ def train(cfg): ...@@ -393,8 +317,7 @@ def train(cfg):
avg_emb_loss, avg_acc, avg_fp, avg_fn, speed, avg_emb_loss, avg_acc, avg_fp, avg_fn, speed,
calculate_eta(all_step - step, speed))) calculate_eta(all_step - step, speed)))
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step)
log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/speed', speed, step) log_writer.add_scalar('Train/speed', speed, step)
sys.stdout.flush() sys.stdout.flush()
...@@ -423,8 +346,7 @@ def train(cfg): ...@@ -423,8 +346,7 @@ def train(cfg):
use_gpu=args.use_gpu, use_gpu=args.use_gpu,
use_mpio=args.use_mpio) use_mpio=args.use_mpio)
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Evaluate/accuracy', accuracy, log_writer.add_scalar('Evaluate/accuracy', accuracy, step)
step)
log_writer.add_scalar('Evaluate/fp', fp, step) log_writer.add_scalar('Evaluate/fp', fp, step)
log_writer.add_scalar('Evaluate/fn', fn, step) log_writer.add_scalar('Evaluate/fn', fn, step)
......
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import six
import numpy as np
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64')
for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64')
_ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
weight = np.fromstring(f.read(data_size), dtype='float32')
f.close()
return np.reshape(weight, tensor_shape)
def load_pdparams(exe, main_prog, model_dir):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
from paddle.fluid.framework import Program
vars_to_load = list()
vars_not_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if var.name not in params_dict:
print("{} is not in saved model".format(var.name))
vars_not_load.append(var.name)
continue
if var.shape != params_dict[var.name].shape:
unused_vars.append(var.name)
vars_not_load.append(var.name)
print(
"[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
.format(var.name, params_dict[var.name].shape, var.shape))
continue
vars_to_load.append(var)
for var_name in unused_vars:
del params_dict[var_name]
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), model_dir))
def load_pretrained_weights(exe, main_prog, weights_dir):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.exists(osp.join(weights_dir, "model.pdparams")):
return load_pdparams(exe, main_prog, weights_dir)
import paddle.fluid as fluid
vars_to_load = list()
vars_not_load = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
print("[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
vars_not_load.append(var)
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
if pretrained_shape != actual_shape:
print(
"[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
.format(weights_dir, var.name, pretrained_shape, actual_shape))
vars_not_load.append(var)
continue
vars_to_load.append(var)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), weights_dir))
...@@ -45,6 +45,7 @@ from models.model_builder import ModelPhase ...@@ -45,6 +45,7 @@ from models.model_builder import ModelPhase
from utils import lanenet_postprocess from utils import lanenet_postprocess
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='PaddeSeg visualization tools') parser = argparse.ArgumentParser(description='PaddeSeg visualization tools')
parser.add_argument( parser.add_argument(
...@@ -106,7 +107,6 @@ def minmax_scale(input_arr): ...@@ -106,7 +107,6 @@ def minmax_scale(input_arr):
return output_arr return output_arr
def visualize(cfg, def visualize(cfg,
vis_file_list=None, vis_file_list=None,
use_gpu=False, use_gpu=False,
...@@ -119,7 +119,6 @@ def visualize(cfg, ...@@ -119,7 +119,6 @@ def visualize(cfg,
if vis_file_list is None: if vis_file_list is None:
vis_file_list = cfg.DATASET.TEST_FILE_LIST vis_file_list = cfg.DATASET.TEST_FILE_LIST
dataset = LaneNetDataset( dataset = LaneNetDataset(
file_list=vis_file_list, file_list=vis_file_list,
mode=ModelPhase.VISUAL, mode=ModelPhase.VISUAL,
...@@ -139,6 +138,11 @@ def visualize(cfg, ...@@ -139,6 +138,11 @@ def visualize(cfg,
ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
save_dir = os.path.join(vis_dir, 'visual_results') save_dir = os.path.join(vis_dir, 'visual_results')
...@@ -161,22 +165,26 @@ def visualize(cfg, ...@@ -161,22 +165,26 @@ def visualize(cfg,
for i in range(num_imgs): for i in range(num_imgs):
gt_image = org_imgs[i] gt_image = org_imgs[i]
binary_seg_image, instance_seg_image = segLogits[i].squeeze(-1), emLogits[i].transpose((1,2,0)) binary_seg_image, instance_seg_image = segLogits[i].squeeze(
-1), emLogits[i].transpose((1, 2, 0))
postprocess_result = postprocessor.postprocess( postprocess_result = postprocessor.postprocess(
binary_seg_result=binary_seg_image, binary_seg_result=binary_seg_image,
instance_seg_result=instance_seg_image, instance_seg_result=instance_seg_image,
source_image=gt_image source_image=gt_image)
) pred_binary_fn = os.path.join(
pred_binary_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_binary')) save_dir, to_png_fn(img_names[i], name='_pred_binary'))
pred_lane_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_lane')) pred_lane_fn = os.path.join(
pred_instance_fn = os.path.join(save_dir, to_png_fn(img_names[i], name='_pred_instance')) save_dir, to_png_fn(img_names[i], name='_pred_lane'))
pred_instance_fn = os.path.join(
save_dir, to_png_fn(img_names[i], name='_pred_instance'))
dirname = os.path.dirname(pred_binary_fn) dirname = os.path.dirname(pred_binary_fn)
makedirs(dirname) makedirs(dirname)
mask_image = postprocess_result['mask_image'] mask_image = postprocess_result['mask_image']
for i in range(4): for i in range(4):
instance_seg_image[:, :, i] = minmax_scale(instance_seg_image[:, :, i]) instance_seg_image[:, :, i] = minmax_scale(
instance_seg_image[:, :, i])
embedding_image = np.array(instance_seg_image).astype(np.uint8) embedding_image = np.array(instance_seg_image).astype(np.uint8)
plt.figure('mask_image') plt.figure('mask_image')
...@@ -189,13 +197,13 @@ def visualize(cfg, ...@@ -189,13 +197,13 @@ def visualize(cfg,
plt.imshow(binary_seg_image * 255, cmap='gray') plt.imshow(binary_seg_image * 255, cmap='gray')
plt.show() plt.show()
cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8)) cv2.imwrite(pred_binary_fn,
np.array(binary_seg_image * 255).astype(np.uint8))
cv2.imwrite(pred_lane_fn, postprocess_result['source_image']) cv2.imwrite(pred_lane_fn, postprocess_result['source_image'])
cv2.imwrite(pred_instance_fn, mask_image) cv2.imwrite(pred_instance_fn, mask_image)
print(pred_lane_fn, 'saved!') print(pred_lane_fn, 'saved!')
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
if args.cfg_file is not None: if args.cfg_file is not None:
......
...@@ -201,11 +201,9 @@ def load_pretrain_weights(exe, main_prog, weights_dir, fuse_bn=False): ...@@ -201,11 +201,9 @@ def load_pretrain_weights(exe, main_prog, weights_dir, fuse_bn=False):
vars_to_load.append(var) vars_to_load.append(var)
logging.debug("Weight {} will be load".format(var.name)) logging.debug("Weight {} will be load".format(var.name))
fluid.io.load_vars( params_dict = fluid.io.load_program_state(
executor=exe, weights_dir, var_list=vars_to_load)
dirname=weights_dir, fluid.io.set_program_state(main_prog, params_dict)
main_program=main_prog,
vars=vars_to_load)
if len(vars_to_load) == 0: if len(vars_to_load) == 0:
logging.warning( logging.warning(
"There is no pretrain weights loaded, maybe you should check you pretrain model!" "There is no pretrain weights loaded, maybe you should check you pretrain model!"
......
...@@ -22,13 +22,9 @@ import os ...@@ -22,13 +22,9 @@ import os
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
import sys import sys
import time
import argparse import argparse
import functools
import pprint import pprint
import cv2
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utils.config import cfg from utils.config import cfg
...@@ -116,6 +112,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): ...@@ -116,6 +112,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None: if ckpt_dir is not None:
print('load test model:', ckpt_dir) print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou # Use streaming confusion matrix to calculate mean_iou
......
...@@ -49,6 +49,7 @@ def parse_args(): ...@@ -49,6 +49,7 @@ def parse_args():
sys.exit(1) sys.exit(1)
return parser.parse_args() return parser.parse_args()
def export_inference_config(): def export_inference_config():
deploy_cfg = '''DEPLOY: deploy_cfg = '''DEPLOY:
USE_GPU : 1 USE_GPU : 1
...@@ -66,9 +67,8 @@ def export_inference_config(): ...@@ -66,9 +67,8 @@ def export_inference_config():
PREDICTOR_MODE : "ANALYSIS" PREDICTOR_MODE : "ANALYSIS"
BATCH_SIZE : 1 BATCH_SIZE : 1
''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME, ''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME,
cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.MEAN, cfg.STD,
cfg.MEAN, cfg.STD, cfg.DATASET.IMAGE_TYPE, cfg.DATASET.IMAGE_TYPE, cfg.DATASET.NUM_CLASSES, len(cfg.STD))
cfg.DATASET.NUM_CLASSES, len(cfg.STD))
if not os.path.exists(cfg.FREEZE.SAVE_DIR): if not os.path.exists(cfg.FREEZE.SAVE_DIR):
os.mkdir(cfg.FREEZE.SAVE_DIR) os.mkdir(cfg.FREEZE.SAVE_DIR)
yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml') yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml')
...@@ -94,7 +94,13 @@ def export_inference_model(args): ...@@ -94,7 +94,13 @@ def export_inference_model(args):
infer_prog = infer_prog.clone(for_test=True) infer_prog = infer_prog.clone(for_test=True)
if os.path.exists(cfg.TEST.TEST_MODEL): if os.path.exists(cfg.TEST.TEST_MODEL):
fluid.io.load_params(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) print('load test model:', cfg.TEST.TEST_MODEL)
try:
fluid.load(infer_prog, os.path.join(cfg.TEST.TEST_MODEL, 'model'),
exe)
except:
fluid.io.load_params(
exe, cfg.TEST.TEST_MODEL, main_program=infer_prog)
else: else:
print("TEST.TEST_MODEL diretory is empty!") print("TEST.TEST_MODEL diretory is empty!")
exit(-1) exit(-1)
......
...@@ -26,9 +26,7 @@ import argparse ...@@ -26,9 +26,7 @@ import argparse
import pprint import pprint
import random import random
import shutil import shutil
import functools
import paddle
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import profiler from paddle.fluid import profiler
...@@ -39,10 +37,10 @@ from metrics import ConfusionMatrix ...@@ -39,10 +37,10 @@ from metrics import ConfusionMatrix
from reader import SegDataset from reader import SegDataset
from models.model_builder import build_model from models.model_builder import build_model
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
from models.model_builder import parse_shape_from_file
from eval import evaluate from eval import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
def parse_args(): def parse_args():
...@@ -118,38 +116,7 @@ def parse_args(): ...@@ -118,38 +116,7 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None): def save_checkpoint(program, ckpt_name):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
""" """
Save checkpoint for evaluation or resume training Save checkpoint for evaluation or resume training
""" """
...@@ -158,29 +125,22 @@ def save_checkpoint(exe, program, ckpt_name): ...@@ -158,29 +125,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir) os.makedirs(ckpt_dir)
save_vars( fluid.save(program, os.path.join(ckpt_dir, 'model'))
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir return ckpt_dir
def load_checkpoint(exe, program): def load_checkpoint(exe, program):
""" """
Load checkpoiont from pretrained model directory for resume training Load checkpoiont for resuming training
""" """
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator # Check is path ended by path spearator
if model_path[-1] == os.sep: if model_path[-1] == os.sep:
model_path = model_path[0:-1] model_path = model_path[0:-1]
...@@ -195,7 +155,6 @@ def load_checkpoint(exe, program): ...@@ -195,7 +155,6 @@ def load_checkpoint(exe, program):
else: else:
raise ValueError("Resume model path is not valid!") raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!") print("Model checkpoint loaded successfully!")
return begin_epoch return begin_epoch
...@@ -247,8 +206,6 @@ def train(cfg): ...@@ -247,8 +206,6 @@ def train(cfg):
yield item[0], item[1], item[2] yield item[0], item[1], item[2]
# Get device environment # Get device environment
# places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
# place = places[0]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
...@@ -304,42 +261,7 @@ def train(cfg): ...@@ -304,42 +261,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog) begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
...@@ -418,12 +340,9 @@ def train(cfg): ...@@ -418,12 +340,9 @@ def train(cfg):
step) step)
log_writer.add_scalar('Train/mean_acc', mean_acc, log_writer.add_scalar('Train/mean_acc', mean_acc,
step) step)
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/step/sec', speed, step)
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
cm.zero_matrix() cm.zero_matrix()
...@@ -445,12 +364,9 @@ def train(cfg): ...@@ -445,12 +364,9 @@ def train(cfg):
).format(epoch, step, lr[0], avg_loss, speed, ).format(epoch, step, lr[0], avg_loss, speed,
calculate_eta(all_step - step, speed))) calculate_eta(all_step - step, speed)))
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/speed', speed, step)
step)
log_writer.add_scalar('Train/speed', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
timer.restart() timer.restart()
...@@ -470,7 +386,7 @@ def train(cfg): ...@@ -470,7 +386,7 @@ def train(cfg):
if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, train_prog, epoch) ckpt_dir = save_checkpoint(train_prog, epoch)
if args.do_eval: if args.do_eval:
print("Evaluation start") print("Evaluation start")
...@@ -480,10 +396,8 @@ def train(cfg): ...@@ -480,10 +396,8 @@ def train(cfg):
use_gpu=args.use_gpu, use_gpu=args.use_gpu,
use_mpio=args.use_mpio) use_mpio=args.use_mpio)
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
if mean_iou > best_mIoU: if mean_iou > best_mIoU:
best_mIoU = mean_iou best_mIoU = mean_iou
...@@ -505,7 +419,7 @@ def train(cfg): ...@@ -505,7 +419,7 @@ def train(cfg):
# save final model # save final model
if cfg.TRAINER_ID == 0: if cfg.TRAINER_ID == 0:
save_checkpoint(exe, train_prog, 'final') save_checkpoint(train_prog, 'final')
def main(args): def main(args):
......
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import six
import numpy as np
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64')
for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64')
_ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
weight = np.fromstring(f.read(data_size), dtype='float32')
f.close()
return np.reshape(weight, tensor_shape)
def load_pdparams(exe, main_prog, model_dir):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
from paddle.fluid.framework import Program
vars_to_load = list()
vars_not_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if var.name not in params_dict:
print("{} is not in saved model".format(var.name))
vars_not_load.append(var.name)
continue
if var.shape != params_dict[var.name].shape:
unused_vars.append(var.name)
vars_not_load.append(var.name)
print(
"[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})"
.format(var.name, params_dict[var.name].shape, var.shape))
continue
vars_to_load.append(var)
for var_name in unused_vars:
del params_dict[var_name]
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), model_dir))
def load_pretrained_weights(exe, main_prog, weights_dir):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.exists(osp.join(weights_dir, "model.pdparams")):
return load_pdparams(exe, main_prog, weights_dir)
import paddle.fluid as fluid
vars_to_load = list()
vars_not_load = list()
for var in main_prog.list_vars():
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
print("[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
vars_not_load.append(var)
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
if pretrained_shape != actual_shape:
print(
"[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})"
.format(weights_dir, var.name, pretrained_shape, actual_shape))
vars_not_load.append(var)
continue
vars_to_load.append(var)
params_dict = fluid.io.load_program_state(
weights_dir, var_list=vars_to_load)
fluid.io.set_program_state(main_prog, params_dict)
if len(vars_to_load) == 0:
print(
"There is no pretrain weights loaded, maybe you should check you pretrain model!"
)
else:
print("There are {}/{} varaibles in {} are loaded.".format(
len(vars_to_load),
len(vars_to_load) + len(vars_not_load), weights_dir))
...@@ -115,6 +115,11 @@ def visualize(cfg, ...@@ -115,6 +115,11 @@ def visualize(cfg,
ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir
if ckpt_dir is not None:
print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
save_dir = vis_dir save_dir = vis_dir
...@@ -169,18 +174,13 @@ def visualize(cfg, ...@@ -169,18 +174,13 @@ def visualize(cfg,
print("VisualDL visualization epoch", epoch) print("VisualDL visualization epoch", epoch)
pred_mask_np = np.array(pred_mask.convert("RGB")) pred_mask_np = np.array(pred_mask.convert("RGB"))
log_writer.add_image( log_writer.add_image("Predict/{}".format(img_name),
"Predict/{}".format(img_name), pred_mask_np, epoch)
pred_mask_np,
epoch)
# Original image # Original image
# BGR->RGB # BGR->RGB
img = cv2.imread( img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] img_name))[..., ::-1]
log_writer.add_image( log_writer.add_image("Images/{}".format(img_name), img, epoch)
"Images/{}".format(img_name),
img,
epoch)
# add ground truth (label) images # add ground truth (label) images
grt = grts[i] grt = grts[i]
if grt is not None: if grt is not None:
...@@ -189,9 +189,7 @@ def visualize(cfg, ...@@ -189,9 +189,7 @@ def visualize(cfg,
grt_pil.putpalette(color_map) grt_pil.putpalette(color_map)
grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
grt = np.array(grt_pil.convert("RGB")) grt = np.array(grt_pil.convert("RGB"))
log_writer.add_image( log_writer.add_image("Label/{}".format(img_name), grt,
"Label/{}".format(img_name),
grt,
epoch) epoch)
# If in local_test mode, only visualize 5 images just for testing # If in local_test mode, only visualize 5 images just for testing
......
...@@ -44,6 +44,7 @@ from model_builder import parse_shape_from_file ...@@ -44,6 +44,7 @@ from model_builder import parse_shape_from_file
from eval import evaluate from eval import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
import solver import solver
from paddleslim.dist.single_distiller import merge, l2_loss from paddleslim.dist.single_distiller import merge, l2_loss
...@@ -116,38 +117,7 @@ def parse_args(): ...@@ -116,38 +117,7 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None): def save_checkpoint(program, ckpt_name):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
""" """
Save checkpoint for evaluation or resume training Save checkpoint for evaluation or resume training
""" """
...@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name): ...@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir) os.makedirs(ckpt_dir)
save_vars( fluid.save(program, os.path.join(ckpt_dir, 'model'))
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir return ckpt_dir
def load_checkpoint(exe, program): def load_checkpoint(exe, program):
""" """
Load checkpoiont from pretrained model directory for resume training Load checkpoiont for resuming training
""" """
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator # Check is path ended by path spearator
if model_path[-1] == os.sep: if model_path[-1] == os.sep:
model_path = model_path[0:-1] model_path = model_path[0:-1]
...@@ -193,7 +156,6 @@ def load_checkpoint(exe, program): ...@@ -193,7 +156,6 @@ def load_checkpoint(exe, program):
else: else:
raise ValueError("Resume model path is not valid!") raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!") print("Model checkpoint loaded successfully!")
return begin_epoch return begin_epoch
...@@ -289,6 +251,10 @@ def train(cfg): ...@@ -289,6 +251,10 @@ def train(cfg):
ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR
assert ckpt_dir is not None assert ckpt_dir is not None
print('load teacher model:', ckpt_dir) print('load teacher model:', ckpt_dir)
if os.path.exists(ckpt_dir):
try:
fluid.load(teacher_program, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program) fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program)
# cfg = load_config(FLAGS.config) # cfg = load_config(FLAGS.config)
...@@ -355,42 +321,8 @@ def train(cfg): ...@@ -355,42 +321,8 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, fluid.default_main_program()) begin_epoch = load_checkpoint(exe, fluid.default_main_program())
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, fluid.default_main_program(),
load_vars = [] cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in fluid.default_main_program().list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
...@@ -475,12 +407,9 @@ def train(cfg): ...@@ -475,12 +407,9 @@ def train(cfg):
step) step)
log_writer.add_scalar('Train/mean_acc', mean_acc, log_writer.add_scalar('Train/mean_acc', mean_acc,
step) step)
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/step/sec', speed, step)
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
cm.zero_matrix() cm.zero_matrix()
...@@ -503,16 +432,13 @@ def train(cfg): ...@@ -503,16 +432,13 @@ def train(cfg):
speed = args.log_steps / timer.elapsed_time() speed = args.log_steps / timer.elapsed_time()
print(( print((
"epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}" "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}"
).format(epoch, step, lr[0], avg_loss, ).format(epoch, step, lr[0], avg_loss, avg_t_loss,
avg_t_loss, avg_d_loss, speed, avg_d_loss, speed,
calculate_eta(all_step - step, speed))) calculate_eta(all_step - step, speed)))
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/speed', speed, step)
step)
log_writer.add_scalar('Train/speed', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
avg_t_loss = 0.0 avg_t_loss = 0.0
...@@ -527,7 +453,7 @@ def train(cfg): ...@@ -527,7 +453,7 @@ def train(cfg):
if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch) ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)
if args.do_eval: if args.do_eval:
print("Evaluation start") print("Evaluation start")
...@@ -537,10 +463,8 @@ def train(cfg): ...@@ -537,10 +463,8 @@ def train(cfg):
use_gpu=args.use_gpu, use_gpu=args.use_gpu,
use_mpio=args.use_mpio) use_mpio=args.use_mpio)
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
if mean_iou > best_mIoU: if mean_iou > best_mIoU:
best_mIoU = mean_iou best_mIoU = mean_iou
...@@ -560,11 +484,11 @@ def train(cfg): ...@@ -560,11 +484,11 @@ def train(cfg):
ckpt_dir=ckpt_dir, ckpt_dir=ckpt_dir,
log_writer=log_writer) log_writer=log_writer)
if cfg.TRAINER_ID == 0: if cfg.TRAINER_ID == 0:
ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch) ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)
# save final model # save final model
if cfg.TRAINER_ID == 0: if cfg.TRAINER_ID == 0:
save_checkpoint(exe, fluid.default_main_program(), 'final') save_checkpoint(fluid.default_main_program(), 'final')
def main(args): def main(args):
......
...@@ -123,6 +123,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): ...@@ -123,6 +123,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
if ckpt_dir is not None: if ckpt_dir is not None:
print('load test model:', ckpt_dir) print('load test model:', ckpt_dir)
try:
fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
except:
fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
# Use streaming confusion matrix to calculate mean_iou # Use streaming confusion matrix to calculate mean_iou
......
...@@ -47,6 +47,7 @@ from model_builder import parse_shape_from_file ...@@ -47,6 +47,7 @@ from model_builder import parse_shape_from_file
from eval_nas import evaluate from eval_nas import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
from mobilenetv2_search_space import MobileNetV2SpaceSeg from mobilenetv2_search_space import MobileNetV2SpaceSeg
from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
...@@ -116,38 +117,7 @@ def parse_args(): ...@@ -116,38 +117,7 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def save_vars(executor, dirname, program=None, vars=None): def save_checkpoint(program, ckpt_name):
"""
Temporary resolution for Win save variables compatability.
Will fix in PaddlePaddle v1.5.2
"""
save_program = fluid.Program()
save_block = save_program.global_block()
for each_var in vars:
# NOTE: don't save the variable which type is RAW
if each_var.type == fluid.core.VarDesc.VarType.RAW:
continue
new_var = save_block.create_var(
name=each_var.name,
shape=each_var.shape,
dtype=each_var.dtype,
type=each_var.type,
lod_level=each_var.lod_level,
persistable=True)
file_path = os.path.join(dirname, new_var.name)
file_path = os.path.normpath(file_path)
save_block.append_op(
type='save',
inputs={'X': [new_var]},
outputs={},
attrs={'file_path': file_path})
executor.run(save_program)
def save_checkpoint(exe, program, ckpt_name):
""" """
Save checkpoint for evaluation or resume training Save checkpoint for evaluation or resume training
""" """
...@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name): ...@@ -156,29 +126,22 @@ def save_checkpoint(exe, program, ckpt_name):
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir) os.makedirs(ckpt_dir)
save_vars( fluid.save(program, os.path.join(ckpt_dir, 'model'))
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir return ckpt_dir
def load_checkpoint(exe, program): def load_checkpoint(exe, program):
""" """
Load checkpoiont from pretrained model directory for resume training Load checkpoiont for resuming training
""" """
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR model_path = cfg.TRAIN.RESUME_MODEL_DIR
print('Resume model training from:', model_path)
if not os.path.exists(model_path):
raise ValueError(
"TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path))
fluid.load(program, os.path.join(model_path, 'model'), exe)
# Check is path ended by path spearator # Check is path ended by path spearator
if model_path[-1] == os.sep: if model_path[-1] == os.sep:
model_path = model_path[0:-1] model_path = model_path[0:-1]
...@@ -193,7 +156,6 @@ def load_checkpoint(exe, program): ...@@ -193,7 +156,6 @@ def load_checkpoint(exe, program):
else: else:
raise ValueError("Resume model path is not valid!") raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!") print("Model checkpoint loaded successfully!")
return begin_epoch return begin_epoch
...@@ -245,8 +207,6 @@ def train(cfg): ...@@ -245,8 +207,6 @@ def train(cfg):
yield item[0], item[1], item[2] yield item[0], item[1], item[2]
# Get device environment # Get device environment
# places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
# place = places[0]
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
...@@ -326,43 +286,8 @@ def train(cfg): ...@@ -326,43 +286,8 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog) begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, train_prog,
load_vars = [] cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info(
"{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
...@@ -419,8 +344,7 @@ def train(cfg): ...@@ -419,8 +344,7 @@ def train(cfg):
except Exception as e: except Exception as e:
print(e) print(e)
if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH:
ckpt_dir = save_checkpoint(exe, train_prog, ckpt_dir = save_checkpoint(train_prog, '{}_tmp'.format(port))
'{}_tmp'.format(port))
_, mean_iou, _, mean_acc = evaluate( _, mean_iou, _, mean_acc = evaluate(
cfg=cfg, cfg=cfg,
arch=arch, arch=arch,
......
...@@ -46,6 +46,7 @@ from models.model_builder import parse_shape_from_file ...@@ -46,6 +46,7 @@ from models.model_builder import parse_shape_from_file
from eval_prune import evaluate from eval_prune import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from utils.load_model_utils import load_pretrained_weights
from paddleslim.prune import Pruner, save_model from paddleslim.prune import Pruner, save_model
from paddleslim.analysis import flops from paddleslim.analysis import flops
...@@ -285,42 +286,7 @@ def train(cfg): ...@@ -285,42 +286,7 @@ def train(cfg):
begin_epoch = load_checkpoint(exe, train_prog) begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
...@@ -409,12 +375,9 @@ def train(cfg): ...@@ -409,12 +375,9 @@ def train(cfg):
step) step)
log_writer.add_scalar('Train/mean_acc', mean_acc, log_writer.add_scalar('Train/mean_acc', mean_acc,
step) step)
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/step/sec', speed, step)
step)
log_writer.add_scalar('Train/step/sec', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
cm.zero_matrix() cm.zero_matrix()
...@@ -436,12 +399,9 @@ def train(cfg): ...@@ -436,12 +399,9 @@ def train(cfg):
).format(epoch, step, lr[0], avg_loss, speed, ).format(epoch, step, lr[0], avg_loss, speed,
calculate_eta(all_step - step, speed))) calculate_eta(all_step - step, speed)))
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Train/loss', avg_loss, log_writer.add_scalar('Train/loss', avg_loss, step)
step) log_writer.add_scalar('Train/lr', lr[0], step)
log_writer.add_scalar('Train/lr', lr[0], log_writer.add_scalar('Train/speed', speed, step)
step)
log_writer.add_scalar('Train/speed', speed,
step)
sys.stdout.flush() sys.stdout.flush()
avg_loss = 0.0 avg_loss = 0.0
timer.restart() timer.restart()
...@@ -464,10 +424,8 @@ def train(cfg): ...@@ -464,10 +424,8 @@ def train(cfg):
use_gpu=args.use_gpu, use_gpu=args.use_gpu,
use_mpio=args.use_mpio) use_mpio=args.use_mpio)
if args.use_vdl: if args.use_vdl:
log_writer.add_scalar('Evaluate/mean_iou', mean_iou, log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)
log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
step)
# Use VisualDL to visualize results # Use VisualDL to visualize results
if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
......
...@@ -40,7 +40,8 @@ from models.model_builder import parse_shape_from_file ...@@ -40,7 +40,8 @@ from models.model_builder import parse_shape_from_file
from eval_quant import evaluate from eval_quant import evaluate
from vis import visualize from vis import visualize
from utils import dist_utils from utils import dist_utils
from train import save_vars, save_checkpoint, load_checkpoint, update_best_model, print_info from utils.load_model_utils import load_pretrained_weights
from train import update_best_model, print_info
from paddleslim.quant import quant_aware from paddleslim.quant import quant_aware
...@@ -103,6 +104,55 @@ def parse_args(): ...@@ -103,6 +104,55 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def save_checkpoint(exe, program, ckpt_name):
"""
Save checkpoint for evaluation or resume training
"""
ckpt_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, str(ckpt_name))
print("Save model checkpoint to {}".format(ckpt_dir))
if not os.path.isdir(ckpt_dir):
os.makedirs(ckpt_dir)
fluid.io.save_vars(
exe,
ckpt_dir,
program,
vars=list(filter(fluid.io.is_persistable, program.list_vars())))
return ckpt_dir
def load_checkpoint(exe, program):
"""
Load checkpoiont from pretrained model directory for resume training
"""
print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR)
if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR):
raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format(
cfg.TRAIN.RESUME_MODEL_DIR))
fluid.io.load_persistables(
exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program)
model_path = cfg.TRAIN.RESUME_MODEL_DIR
# Check is path ended by path spearator
if model_path[-1] == os.sep:
model_path = model_path[0:-1]
epoch_name = os.path.basename(model_path)
# If resume model is final model
if epoch_name == 'final':
begin_epoch = cfg.SOLVER.NUM_EPOCHS
# If resume model path is end of digit, restore epoch status
elif epoch_name.isdigit():
epoch = int(epoch_name)
begin_epoch = epoch + 1
else:
raise ValueError("Resume model path is not valid!")
print("Model checkpoint loaded successfully!")
return begin_epoch
def train_quant(cfg): def train_quant(cfg):
startup_prog = fluid.Program() startup_prog = fluid.Program()
train_prog = fluid.Program() train_prog = fluid.Program()
...@@ -182,42 +232,7 @@ def train_quant(cfg): ...@@ -182,42 +232,7 @@ def train_quant(cfg):
begin_epoch = load_checkpoint(exe, train_prog) begin_epoch = load_checkpoint(exe, train_prog)
# Load pretrained model # Load pretrained model
elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
load_vars = []
load_fail_vars = []
def var_shape_matched(var, shape):
"""
Check whehter persitable variable shape is match with current network
"""
var_exist = os.path.exists(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
if var_exist:
var_shape = parse_shape_from_file(
os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
return var_shape == shape
return False
for x in train_prog.list_vars():
if isinstance(x, fluid.framework.Parameter):
shape = tuple(fluid.global_scope().find_var(
x.name).get_tensor().shape())
if var_shape_matched(x, shape):
load_vars.append(x)
else:
load_fail_vars.append(x)
fluid.io.load_vars(
exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars)
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else: else:
print_info( print_info(
'Pretrained model dir {} not exists, training from scratch...'. 'Pretrained model dir {} not exists, training from scratch...'.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册