diff --git a/fluid/PaddleCV/deeplabv3+/eval.py b/fluid/PaddleCV/deeplabv3+/eval.py index 624159a54d3ff55e29d9f5ac71c673e5e396d9e7..5699f2fac3ff52e39932eba71e8d25a189bf8fc6 100644 --- a/fluid/PaddleCV/deeplabv3+/eval.py +++ b/fluid/PaddleCV/deeplabv3+/eval.py @@ -26,6 +26,7 @@ def add_arguments(): add_argument('dataset_path', str, None, "Cityscape dataset path.") add_argument('verbose', bool, False, "Print mIoU for each step if verbose.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") + add_argument('num_classes', int, 19, "Number of classes.") def mean_iou(pred, label): @@ -69,7 +70,7 @@ tp = fluid.Program() batch_size = 1 reader.default_config['crop_size'] = -1 reader.default_config['shuffle'] = False -num_classes = 19 +num_classes = args.num_classes with fluid.program_guard(tp, sp): img = fluid.layers.data(name='img', shape=[3, 0, 0], dtype='float32') @@ -84,7 +85,7 @@ tp = tp.clone(True) fluid.memory_optimize( tp, print_log=False, - skip_opt_set=[pred.name, miou, out_wrong, out_correct], + skip_opt_set=set([pred.name, miou, out_wrong, out_correct]), level=1) place = fluid.CPUPlace() diff --git a/fluid/PaddleCV/deeplabv3+/models.py b/fluid/PaddleCV/deeplabv3+/models.py index feca2142293ee2169fbe0d2bdc82f1d950af00de..c1ea12296af3e9b6e0bb783cfa10efe5adfa15aa 100644 --- a/fluid/PaddleCV/deeplabv3+/models.py +++ b/fluid/PaddleCV/deeplabv3+/models.py @@ -20,6 +20,11 @@ op_results = {} default_epsilon = 1e-3 default_norm_type = 'bn' default_group_number = 32 +depthwise_use_cudnn = False + +bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) +depthwise_regularizer = fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0) @contextlib.contextmanager @@ -52,20 +57,39 @@ def append_op_result(result, name): def conv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' + if "xception" in name_scope: + init_std = 0.09 + elif "logit" in name_scope: + init_std = 0.01 + elif name_scope.endswith('depthwise/'): + init_std = 0.33 + else: + init_std = 0.06 + if name_scope.endswith('depthwise/'): + regularizer = depthwise_regularizer + else: + regularizer = None + + kargs['param_attr'] = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=regularizer, + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=init_std)) if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = name_scope + 'biases' + kargs['bias_attr'] = fluid.ParamAttr( + name=name_scope + 'biases', + regularizer=regularizer, + initializer=fluid.initializer.ConstantInitializer(value=0.0)) else: kargs['bias_attr'] = False + kargs['name'] = name_scope + 'conv' return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv') def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - helper = fluid.layer_helper.LayerHelper('group_norm', **locals()) - N, C, H, W = input.shape if C % G != 0: - print("group can not divide channle:", C, G) + # print "group can not divide channle:", C, G for d in range(10): for t in [d, -d]: if G + t <= 0: continue @@ -73,29 +97,16 @@ def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): G = G + t break if C % G == 0: - print("use group size:", G) + # print "use group size:", G break assert C % G == 0 - param_shape = (G, ) - x = input - x = fluid.layers.reshape(x, [N, G, C // G * H * W]) - mean = fluid.layers.reduce_mean(x, dim=2, keep_dim=True) - x = x - mean - var = fluid.layers.reduce_mean(fluid.layers.square(x), dim=2, keep_dim=True) - x = x / fluid.layers.sqrt(var + eps) - - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype='float32', - default_initializer=fluid.initializer.Constant(1.0)) - - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype='float32', is_bias=True) - x = fluid.layers.elementwise_add( - fluid.layers.elementwise_mul( - x, scale, axis=1), bias, axis=1) - return fluid.layers.reshape(x, input.shape) + x = fluid.layers.group_norm( + input, + groups=G, + param_attr=param_attr, + bias_attr=bias_attr, + name=name_scope + 'group_norm') + return x def bn(*args, **kargs): @@ -106,8 +117,10 @@ def bn(*args, **kargs): *args, epsilon=default_epsilon, momentum=bn_momentum, - param_attr=name_scope + 'gamma', - bias_attr=name_scope + 'beta', + param_attr=fluid.ParamAttr( + name=name_scope + 'gamma', regularizer=bn_regularizer), + bias_attr=fluid.ParamAttr( + name=name_scope + 'beta', regularizer=bn_regularizer), moving_mean_name=name_scope + 'moving_mean', moving_variance_name=name_scope + 'moving_variance', **kargs), @@ -119,8 +132,10 @@ def bn(*args, **kargs): args[0], default_group_number, eps=default_epsilon, - param_attr=name_scope + 'gamma', - bias_attr=name_scope + 'beta'), + param_attr=fluid.ParamAttr( + name=name_scope + 'gamma', regularizer=bn_regularizer), + bias_attr=fluid.ParamAttr( + name=name_scope + 'beta', regularizer=bn_regularizer)), 'gn') else: raise "Unsupport norm type:" + default_norm_type @@ -143,7 +158,8 @@ def seq_conv(input, channel, stride, filter, dilation=1, act=None): stride, groups=input.shape[1], padding=(filter // 2) * dilation, - dilation=dilation) + dilation=dilation, + use_cudnn=depthwise_use_cudnn) input = bn(input) if act: input = act(input) with scope('pointwise'): diff --git a/fluid/PaddleCV/deeplabv3+/train.py b/fluid/PaddleCV/deeplabv3+/train.py index fcc038b137349877e06c98a6d533669353bb4b34..817d53d173467f9146918ec9bb6b44141eb0ac3f 100644 --- a/fluid/PaddleCV/deeplabv3+/train.py +++ b/fluid/PaddleCV/deeplabv3+/train.py @@ -13,6 +13,7 @@ import reader import models import time + def add_argument(name, type, default, help): parser.add_argument('--' + name, default=default, type=type, help=help) @@ -32,15 +33,28 @@ def add_arguments(): add_argument('dataset_path', str, None, "Cityscape dataset path.") add_argument('parallel', bool, False, "using ParallelExecutor.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") + add_argument('num_classes', int, 19, "Number of classes.") def load_model(): + myvars = [ + x for x in tp.list_vars() + if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') == + -1 + ] if args.init_weights_path.endswith('/'): - fluid.io.load_params( - exe, dirname=args.init_weights_path, main_program=tp) + if args.num_classes == 19: + fluid.io.load_params( + exe, dirname=args.init_weights_path, main_program=tp) + else: + fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=myvars) else: - fluid.io.load_params( - exe, dirname="", filename=args.init_weights_path, main_program=tp) + if args.num_classes == 19: + fluid.io.load_params( + exe, dirname=args.init_weights_path, main_program=tp) + else: + fluid.io.load_vars( + exe, dirname="", filename=args.init_weights_path, vars=myvars) def save_model(): @@ -80,6 +94,7 @@ args = parser.parse_args() models.clean() models.bn_momentum = 0.9997 models.dropout_keep_prop = 0.9 +models.label_number = args.num_classes deeplabv3p = models.deeplabv3p sp = fluid.Program() @@ -89,7 +104,7 @@ batch_size = args.batch_size image_shape = [crop_size, crop_size] reader.default_config['crop_size'] = crop_size reader.default_config['shuffle'] = True -num_classes = 19 +num_classes = args.num_classes weight_decay = 0.00004 base_lr = args.base_lr @@ -120,7 +135,7 @@ with fluid.program_guard(tp, sp): retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set) fluid.memory_optimize( - tp, print_log=False, skip_opt_set=[pred.name, loss_mean.name], level=1) + tp, print_log=False, skip_opt_set=set([pred.name, loss_mean.name]), level=1) place = fluid.CPUPlace() if args.use_gpu: @@ -155,8 +170,8 @@ for i, imgs, labels, names in batches: if i % 100 == 0: print("Model is saved to", args.save_weights_path) save_model() - print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}" .format(i, - np.mean(retv[1]), end_time - prev_start_time)) + print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format( + i, np.mean(retv[1]), end_time - prev_start_time)) print("Training done. Model is saved to", args.save_weights_path) save_model() diff --git a/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh b/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh new file mode 100644 index 0000000000000000000000000000000000000000..eca247a40a3f680a6a59c4a183bfba006ced8d44 --- /dev/null +++ b/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +cudaid=${text_matching_on_quora:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py + +cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py diff --git a/fluid/PaddleNLP/text_matching_on_quora/__init__.py b/fluid/PaddleNLP/text_matching_on_quora/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/fluid/PaddleNLP/text_matching_on_quora/_ce.py b/fluid/PaddleNLP/text_matching_on_quora/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..b38ad21a1e0eb7407f78d100a3cb3659f6c5d8d3 --- /dev/null +++ b/fluid/PaddleNLP/text_matching_on_quora/_ce.py @@ -0,0 +1,65 @@ +# this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + + +each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True) +train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0) +train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0) +each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True) +train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0) +train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0) + +tracking_kpis = [ + each_pass_duration_card1_kpi, + train_avg_cost_card1_kpi, + train_avg_acc_card1_kpi, + each_pass_duration_card4_kpi, + train_avg_cost_card4_kpi, + train_avg_acc_card4_kpi, + ] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py b/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py index 0cca171933fac9dfc47baaf45d551b65d69c2f7a..714fa6f970d9f213efdc6b6e1799b244696fb20d 100755 --- a/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py +++ b/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py @@ -33,6 +33,7 @@ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train") parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting") +parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset') @@ -139,6 +140,13 @@ def train_and_evaluate(train_reader, else: feeder = fluid.DataFeeder(feed_list=[q1, q2, mask1, mask2, label], place=place) + # only for ce + args = parser.parse_args() + if args.enable_ce: + SEED = 102 + fluid.default_startup_program().random_seed = SEED + fluid.default_main_program().random_seed = SEED + # logging param info for param in fluid.default_main_program().global_block().all_parameters(): print("param name: %s; param shape: %s" % (param.name, param.shape)) @@ -167,8 +175,10 @@ def train_and_evaluate(train_reader, metric_type=global_config.metric_type) # start training + total_time = 0.0 print("[%s] Start Training" % time.asctime(time.localtime(time.time()))) for epoch_id in range(global_config.epoch_num): + data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 batch_id = 0 epoch_begin_time = time.time() @@ -177,8 +187,8 @@ def train_and_evaluate(train_reader, feed=feeder.feed(data), fetch_list=[cost, acc]) data_size = len(data) - total_acc += data_size * avg_acc_np - total_cost += data_size * avg_cost_np + total_acc += data_size * avg_acc_np[0] + total_cost += data_size * avg_cost_np[0] data_count += data_size if batch_id % 100 == 0: print("[%s] epoch_id: %d, batch_id: %d, cost: %f, acc: %f" % ( @@ -188,16 +198,30 @@ def train_and_evaluate(train_reader, avg_cost_np, avg_acc_np)) batch_id += 1 - avg_cost = total_cost / data_count avg_acc = total_acc / data_count - + epoch_end_time = time.time() + total_time += epoch_end_time - epoch_begin_time + print("") print("[%s] epoch_id: %d, train_avg_cost: %f, train_avg_acc: %f, epoch_time_cost: %f" % ( time.asctime( time.localtime(time.time())), epoch_id, avg_cost, avg_acc, time.time() - epoch_begin_time)) + # only for ce + if epoch_id == global_config.epoch_num - 1 and args.enable_ce: + #Note: The following logs are special for CE monitoring. + #Other situations do not need to care about these logs. + gpu_num = get_cards(args) + print("kpis\teach_pass_duration_card%s\t%s" % \ + (gpu_num, total_time / (global_config.epoch_num))) + print("kpis\ttrain_avg_cost_card%s\t%s" % + (gpu_num, avg_cost)) + print("kpis\ttrain_avg_acc_card%s\t%s" % + (gpu_num, avg_acc)) + + epoch_model = global_config.save_dirname + "/" + "epoch" + str(epoch_id) fluid.io.save_inference_model(epoch_model, ["question1", "question2", "label"], acc, exe) @@ -267,5 +291,15 @@ def main(): use_cuda=global_config.use_cuda, parallel=False) + +def get_cards(args): + if args.enable_ce: + cards = os.environ.get('CUDA_VISIBLE_DEVICES') + num = len(cards.split(",")) + return num + else: + return args.num_devices + + if __name__ == "__main__": main()