未验证 提交 c0b6a1d1 编写于 作者: Z zhengya01 提交者: GitHub

Merge pull request #2 from PaddlePaddle/develop

merge from upstream
......@@ -26,6 +26,7 @@ def add_arguments():
add_argument('dataset_path', str, None, "Cityscape dataset path.")
add_argument('verbose', bool, False, "Print mIoU for each step if verbose.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.")
def mean_iou(pred, label):
......@@ -69,7 +70,7 @@ tp = fluid.Program()
batch_size = 1
reader.default_config['crop_size'] = -1
reader.default_config['shuffle'] = False
num_classes = 19
num_classes = args.num_classes
with fluid.program_guard(tp, sp):
img = fluid.layers.data(name='img', shape=[3, 0, 0], dtype='float32')
......@@ -84,7 +85,7 @@ tp = tp.clone(True)
fluid.memory_optimize(
tp,
print_log=False,
skip_opt_set=[pred.name, miou, out_wrong, out_correct],
skip_opt_set=set([pred.name, miou, out_wrong, out_correct]),
level=1)
place = fluid.CPUPlace()
......
......@@ -20,6 +20,11 @@ op_results = {}
default_epsilon = 1e-3
default_norm_type = 'bn'
default_group_number = 32
depthwise_use_cudnn = False
bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0)
depthwise_regularizer = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)
@contextlib.contextmanager
......@@ -52,20 +57,39 @@ def append_op_result(result, name):
def conv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights'
if "xception" in name_scope:
init_std = 0.09
elif "logit" in name_scope:
init_std = 0.01
elif name_scope.endswith('depthwise/'):
init_std = 0.33
else:
init_std = 0.06
if name_scope.endswith('depthwise/'):
regularizer = depthwise_regularizer
else:
regularizer = None
kargs['param_attr'] = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=regularizer,
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=init_std))
if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = name_scope + 'biases'
kargs['bias_attr'] = fluid.ParamAttr(
name=name_scope + 'biases',
regularizer=regularizer,
initializer=fluid.initializer.ConstantInitializer(value=0.0))
else:
kargs['bias_attr'] = False
kargs['name'] = name_scope + 'conv'
return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv')
def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
helper = fluid.layer_helper.LayerHelper('group_norm', **locals())
N, C, H, W = input.shape
if C % G != 0:
print("group can not divide channle:", C, G)
# print "group can not divide channle:", C, G
for d in range(10):
for t in [d, -d]:
if G + t <= 0: continue
......@@ -73,29 +97,16 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
G = G + t
break
if C % G == 0:
print("use group size:", G)
# print "use group size:", G
break
assert C % G == 0
param_shape = (G, )
x = input
x = fluid.layers.reshape(x, [N, G, C // G * H * W])
mean = fluid.layers.reduce_mean(x, dim=2, keep_dim=True)
x = x - mean
var = fluid.layers.reduce_mean(fluid.layers.square(x), dim=2, keep_dim=True)
x = x / fluid.layers.sqrt(var + eps)
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype='float32',
default_initializer=fluid.initializer.Constant(1.0))
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype='float32', is_bias=True)
x = fluid.layers.elementwise_add(
fluid.layers.elementwise_mul(
x, scale, axis=1), bias, axis=1)
return fluid.layers.reshape(x, input.shape)
x = fluid.layers.group_norm(
input,
groups=G,
param_attr=param_attr,
bias_attr=bias_attr,
name=name_scope + 'group_norm')
return x
def bn(*args, **kargs):
......@@ -106,8 +117,10 @@ def bn(*args, **kargs):
*args,
epsilon=default_epsilon,
momentum=bn_momentum,
param_attr=name_scope + 'gamma',
bias_attr=name_scope + 'beta',
param_attr=fluid.ParamAttr(
name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer),
moving_mean_name=name_scope + 'moving_mean',
moving_variance_name=name_scope + 'moving_variance',
**kargs),
......@@ -119,8 +132,10 @@ def bn(*args, **kargs):
args[0],
default_group_number,
eps=default_epsilon,
param_attr=name_scope + 'gamma',
bias_attr=name_scope + 'beta'),
param_attr=fluid.ParamAttr(
name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer)),
'gn')
else:
raise "Unsupport norm type:" + default_norm_type
......@@ -143,7 +158,8 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None):
stride,
groups=input.shape[1],
padding=(filter // 2) * dilation,
dilation=dilation)
dilation=dilation,
use_cudnn=depthwise_use_cudnn)
input = bn(input)
if act: input = act(input)
with scope('pointwise'):
......
......@@ -13,6 +13,7 @@ import reader
import models
import time
def add_argument(name, type, default, help):
parser.add_argument('--' + name, default=default, type=type, help=help)
......@@ -32,15 +33,28 @@ def add_arguments():
add_argument('dataset_path', str, None, "Cityscape dataset path.")
add_argument('parallel', bool, False, "using ParallelExecutor.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.")
def load_model():
myvars = [
x for x in tp.list_vars()
if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') ==
-1
]
if args.init_weights_path.endswith('/'):
fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
if args.num_classes == 19:
fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
else:
fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=myvars)
else:
fluid.io.load_params(
exe, dirname="", filename=args.init_weights_path, main_program=tp)
if args.num_classes == 19:
fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
else:
fluid.io.load_vars(
exe, dirname="", filename=args.init_weights_path, vars=myvars)
def save_model():
......@@ -80,6 +94,7 @@ args = parser.parse_args()
models.clean()
models.bn_momentum = 0.9997
models.dropout_keep_prop = 0.9
models.label_number = args.num_classes
deeplabv3p = models.deeplabv3p
sp = fluid.Program()
......@@ -89,7 +104,7 @@ batch_size = args.batch_size
image_shape = [crop_size, crop_size]
reader.default_config['crop_size'] = crop_size
reader.default_config['shuffle'] = True
num_classes = 19
num_classes = args.num_classes
weight_decay = 0.00004
base_lr = args.base_lr
......@@ -120,7 +135,7 @@ with fluid.program_guard(tp, sp):
retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set)
fluid.memory_optimize(
tp, print_log=False, skip_opt_set=[pred.name, loss_mean.name], level=1)
tp, print_log=False, skip_opt_set=set([pred.name, loss_mean.name]), level=1)
place = fluid.CPUPlace()
if args.use_gpu:
......@@ -155,8 +170,8 @@ for i, imgs, labels, names in batches:
if i % 100 == 0:
print("Model is saved to", args.save_weights_path)
save_model()
print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}" .format(i,
np.mean(retv[1]), end_time - prev_start_time))
print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
i, np.mean(retv[1]), end_time - prev_start_time))
print("Training done. Model is saved to", args.save_weights_path)
save_model()
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
cudaid=${text_matching_on_quora:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True)
train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0)
train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True)
train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0)
train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_avg_cost_card1_kpi,
train_avg_acc_card1_kpi,
each_pass_duration_card4_kpi,
train_avg_cost_card4_kpi,
train_avg_acc_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -33,6 +33,7 @@ parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train")
parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset')
......@@ -139,6 +140,13 @@ def train_and_evaluate(train_reader,
else:
feeder = fluid.DataFeeder(feed_list=[q1, q2, mask1, mask2, label], place=place)
# only for ce
args = parser.parse_args()
if args.enable_ce:
SEED = 102
fluid.default_startup_program().random_seed = SEED
fluid.default_main_program().random_seed = SEED
# logging param info
for param in fluid.default_main_program().global_block().all_parameters():
print("param name: %s; param shape: %s" % (param.name, param.shape))
......@@ -167,8 +175,10 @@ def train_and_evaluate(train_reader,
metric_type=global_config.metric_type)
# start training
total_time = 0.0
print("[%s] Start Training" % time.asctime(time.localtime(time.time())))
for epoch_id in range(global_config.epoch_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
batch_id = 0
epoch_begin_time = time.time()
......@@ -177,8 +187,8 @@ def train_and_evaluate(train_reader,
feed=feeder.feed(data),
fetch_list=[cost, acc])
data_size = len(data)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
total_acc += data_size * avg_acc_np[0]
total_cost += data_size * avg_cost_np[0]
data_count += data_size
if batch_id % 100 == 0:
print("[%s] epoch_id: %d, batch_id: %d, cost: %f, acc: %f" % (
......@@ -188,16 +198,30 @@ def train_and_evaluate(train_reader,
avg_cost_np,
avg_acc_np))
batch_id += 1
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
epoch_end_time = time.time()
total_time += epoch_end_time - epoch_begin_time
print("")
print("[%s] epoch_id: %d, train_avg_cost: %f, train_avg_acc: %f, epoch_time_cost: %f" % (
time.asctime( time.localtime(time.time())),
epoch_id, avg_cost, avg_acc,
time.time() - epoch_begin_time))
# only for ce
if epoch_id == global_config.epoch_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring.
#Other situations do not need to care about these logs.
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" % \
(gpu_num, total_time / (global_config.epoch_num)))
print("kpis\ttrain_avg_cost_card%s\t%s" %
(gpu_num, avg_cost))
print("kpis\ttrain_avg_acc_card%s\t%s" %
(gpu_num, avg_acc))
epoch_model = global_config.save_dirname + "/" + "epoch" + str(epoch_id)
fluid.io.save_inference_model(epoch_model, ["question1", "question2", "label"], acc, exe)
......@@ -267,5 +291,15 @@ def main():
use_cuda=global_config.use_cuda,
parallel=False)
def get_cards(args):
if args.enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return args.num_devices
if __name__ == "__main__":
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册