diff --git a/fluid/chinese_ner/infer.py b/fluid/chinese_ner/infer.py index 466c34b067e2a88eac50c9c9577004ca622d9adc..d1664eb9810f49a9bc9e614f3576e0f88216846f 100644 --- a/fluid/chinese_ner/infer.py +++ b/fluid/chinese_ner/infer.py @@ -1,60 +1,162 @@ import numpy as np +import argparse +import time + import paddle.fluid as fluid +import paddle.fluid.profiler as profiler import paddle import reader +def parse_args(): + parser = argparse.ArgumentParser("Run inference.") + parser.add_argument( + '--batch_size', + type=int, + default=6, + help='The size of a batch. (default: %(default)d)') + parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help='The device type. (default: %(default)s)') + parser.add_argument( + '--model_path', + type=str, + default='model/params_pass_0', + help='A path to the model. (default: %(default)s)') + parser.add_argument( + '--test_data_dir', + type=str, + default='data/test_files', + help='A directory with test data files. (default: %(default)s)') + parser.add_argument( + '--test_label_file', + type=str, + default='data/label_dict', + help='A file with test labels. (default: %(default)s)') + parser.add_argument( + '--num_passes', type=int, default=1, help='The number of passes.') + parser.add_argument( + '--skip_pass_num', + type=int, + default=0, + help='The first num of passes to skip in statistics calculations.') + parser.add_argument( + '--profile', action='store_true', help='If set, do profiling.') + args = parser.parse_args() + return args + + +def print_arguments(args): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + def load_reverse_dict(dict_path): return dict((idx, line.strip().split("\t")[0]) for idx, line in enumerate(open(dict_path, "r").readlines())) -def infer(model_path, batch_size, test_data_file, target_file): +def infer(args): word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) mention = fluid.layers.data( name='mention', shape=[1], dtype='int64', lod_level=1) target = fluid.layers.data( name='target', shape=[1], dtype='int64', lod_level=1) - label_reverse_dict = load_reverse_dict(target_file) + label_reverse_dict = load_reverse_dict(args.test_label_file) test_data = paddle.batch( - reader.file_reader(test_data_file), batch_size=batch_size) - place = fluid.CPUPlace() + reader.file_reader(args.test_data_dir), batch_size=args.batch_size) + place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place) exe = fluid.Executor(place) inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(model_path, exe) - for data in test_data(): - crf_decode = exe.run(inference_program, - feed=feeder.feed(data), - fetch_list=fetch_targets, - return_numpy=False) - lod_info = (crf_decode[0].lod())[0] - np_data = np.array(crf_decode[0]) - assert len(data) == len(lod_info) - 1 - for sen_index in xrange(len(data)): - assert len(data[sen_index][0]) == lod_info[ - sen_index + 1] - lod_info[sen_index] - word_index = 0 - for tag_index in xrange(lod_info[sen_index], - lod_info[sen_index + 1]): - word = str(data[sen_index][0][word_index]) - gold_tag = label_reverse_dict[data[sen_index][2][ - word_index]] - tag = label_reverse_dict[np_data[tag_index][0]] - print word + "\t" + gold_tag + "\t" + tag - word_index += 1 - print "" + fetch_targets] = fluid.io.load_inference_model(args.model_path, exe) + total_passes = args.num_passes + args.skip_pass_num + batch_times = [0] * total_passes + word_counts = [0] * total_passes + wpses = [0] * total_passes + all_iters = 0 + for pass_id in range(total_passes): + if pass_id < args.skip_pass_num: + print("Warm-up pass") + if pass_id == args.skip_pass_num: + profiler.reset_profiler() + iters = 0 + for data in test_data(): + start = time.time() + crf_decode = exe.run(inference_program, + feed=feeder.feed(data), + fetch_list=fetch_targets, + return_numpy=False) + batch_time = time.time() - start + lod_info = (crf_decode[0].lod())[0] + np_data = np.array(crf_decode[0]) + word_count = 0 + assert len(data) == len(lod_info) - 1 + for sen_index in xrange(len(data)): + assert len(data[sen_index][0]) == lod_info[ + sen_index + 1] - lod_info[sen_index] + word_index = 0 + for tag_index in xrange(lod_info[sen_index], + lod_info[sen_index + 1]): + word = str(data[sen_index][0][word_index]) + gold_tag = label_reverse_dict[data[sen_index][2][ + word_index]] + tag = label_reverse_dict[np_data[tag_index][0]] + word_index += 1 + word_count += word_index + batch_times[pass_id] += batch_time + word_counts[pass_id] += word_count + iters += 1 + all_iters += 1 + batch_times[pass_id] /= iters + word_counts[pass_id] /= iters + wps = word_counts[pass_id] / batch_times[pass_id] + wpses[pass_id] = wps + + print( + "Pass: %d, iterations (total): %d (%d), latency: %.5f s, words: %d, wps: %f" + % (pass_id, iters, all_iters, batch_times[pass_id], + word_counts[pass_id], wps)) + + # Postprocess benchmark data + latencies = batch_times[args.skip_pass_num:] + latency_avg = np.average(latencies) + latency_std = np.std(latencies) + latency_pc99 = np.percentile(latencies, 99) + wps_avg = np.average(wpses) + wps_std = np.std(wpses) + wps_pc01 = np.percentile(wpses, 1) + + # Benchmark output + print('\nTotal passes (incl. warm-up): %d' % (total_passes)) + print('Total iterations (incl. warm-up): %d' % (all_iters)) + print('Total examples (incl. warm-up): %d' % (all_iters * args.batch_size)) + print('avg latency: %.5f, std latency: %.5f, 99pc latency: %.5f' % + (latency_avg, latency_std, latency_pc99)) + print('avg wps: %.5f, std wps: %.5f, wps for 99pc latency: %.5f' % + (wps_avg, wps_std, wps_pc01)) if __name__ == "__main__": - infer( - model_path="output/params_pass_0", - batch_size=6, - test_data_file="data/test_files", - target_file="data/label_dict") + args = parse_args() + print_arguments(args) + if args.profile: + if args.device == 'GPU': + with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + infer(args) + else: + with profiler.profiler('CPU', sorted_key='total') as cpuprof: + infer(args) + else: + infer(args) diff --git a/fluid/chinese_ner/scripts/README.md b/fluid/chinese_ner/scripts/README.md new file mode 100644 index 0000000000000000000000000000000000000000..44e91b5c772e46466dadc6f5d291f0d09e3268c6 --- /dev/null +++ b/fluid/chinese_ner/scripts/README.md @@ -0,0 +1,36 @@ +## Purpose of this directory +The purpose of this directory is to provide exemplary execution commands. They are inside bash scripts described below. + +## Preparation +To add execution permissions for shell scripts, run in this directory: +`chmod +x *.sh` + +## Performance tips +Use the below environment flags for best performance: +``` +KMP_AFFINITY=granularity=fine,compact,1,0 +OMP_NUM_THREADS= +``` +For example, you can export them, or add them inside the specific files. + +## Training +### CPU with mkldnn +Run: +`./train.sh MKLDNN` +### CPU without mkldnn +Run: +`./train.sh CPU` +### GPU +Run: +`./train.sh GPU` + +## Inference +### CPU with mkldnn +Run: +`./infer.sh MKLDNN` +### CPU without mkldnn +Run: +`./infer.sh CPU` +### GPU +Run: +`./infer.sh GPU` diff --git a/fluid/chinese_ner/scripts/infer.sh b/fluid/chinese_ner/scripts/infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..7fa3367b71996e9049da05a48a3fb245538c9f1b --- /dev/null +++ b/fluid/chinese_ner/scripts/infer.sh @@ -0,0 +1,42 @@ +#!/bin/bash +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +mode=$1 # gpu, cpu, mkldnn +if [ "$mode" = "CPU" ]; then + device="CPU" + model_path="cpu_model" +elif [ "$mode" = "GPU" ]; then + device="GPU" + model_path="gpu_model" +elif [ "$mode" = "MKLDNN" ]; then + device="CPU" + model_path="mkldnn_model" + export FLAGS_use_mkldnn=1 +else + echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}" + exit 1 +fi + +ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` +if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi +else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi +fi + +python ../infer.py \ + --device $device \ + --num_passes 1 \ + --skip_pass_num 2 \ + --profile \ + --test_data_dir ../data/test_files \ + --test_label_file ../data/label_dict \ + --model_path $model_path/params_pass_0 diff --git a/fluid/chinese_ner/scripts/train.sh b/fluid/chinese_ner/scripts/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..999c1f0cd9e8723984f8141916d0a93df60a3380 --- /dev/null +++ b/fluid/chinese_ner/scripts/train.sh @@ -0,0 +1,44 @@ +#!/bin/bash +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +mode=$1 # gpu, cpu, mkldnn +if [ "$mode" = "CPU" ]; then + device="CPU" + parallel="--parallel True" + save_model_dir="cpu_model" +elif [ "$mode" = "GPU" ]; then + device="GPU" + parallel="--parallel True" + save_model_dir="gpu_model" +elif [ "$mode" = "MKLDNN" ]; then + device="CPU" + parallel="" + save_model_dir="mkldnn_model" + export FLAGS_use_mkldnn=1 +else + echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}" + exit 1 +fi + +ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` +if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi +else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi +fi + +python ../train.py \ + --device $device \ + $parallel \ + --model_save_dir $save_model_dir \ + --test_data_dir ../data/test_files \ + --train_data_dir ../data/train_files \ + --num_passes 1 diff --git a/fluid/chinese_ner/train.py b/fluid/chinese_ner/train.py index 9e93fbd95130deb104090c5a0c79d018ef4f1500..ca220d9928ef4f1639335214b1845c0ec0120f0d 100644 --- a/fluid/chinese_ner/train.py +++ b/fluid/chinese_ner/train.py @@ -1,6 +1,7 @@ import os import math import time +import argparse import numpy as np import paddle @@ -10,6 +11,65 @@ from paddle.fluid.initializer import NormalInitializer import reader +def parse_args(): + parser = argparse.ArgumentParser("Run inference.") + parser.add_argument( + '--batch_size', + type=int, + default=256, + help='The size of a batch. (default: %(default)d)') + parser.add_argument( + '--word_dict_len', + type=int, + default=1942563, + help='The lenght of the word dictionary. (default: %(default)d)') + parser.add_argument( + '--label_dict_len', + type=int, + default=49, + help='The lenght of the label dictionary. (default: %(default)d)') + parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help='The device type. (default: %(default)s)') + parser.add_argument( + '--train_data_dir', + type=str, + default='data/train_files', + help='A directory with train data files. (default: %(default)s)') + parser.add_argument( + '--parallel', + type=bool, + default=False, + help="Whether to use parallel training. (default: %(default)s)") + parser.add_argument( + '--test_data_dir', + type=str, + default='data/test_files', + help='A directory with test data files. (default: %(default)s)') + parser.add_argument( + '--model_save_dir', + type=str, + default='./output', + help='A directory for saving models. (default: %(default)s)') + parser.add_argument( + '--num_passes', + type=int, + default=1000, + help='The number of epochs. (default: %(default)d)') + args = parser.parse_args() + return args + + +def print_arguments(args): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + def load_reverse_dict(dict_path): return dict((idx, line.strip().split("\t")[0]) for idx, line in enumerate(open(dict_path, "r").readlines())) @@ -197,32 +257,27 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, return chunk_evaluator.eval() -def main(train_data_file, test_data_file, model_save_dir, num_passes): - if not os.path.exists(model_save_dir): - os.mkdir(model_save_dir) - - BATCH_SIZE = 256 - word_dict_len = 1942563 - label_dict_len = 49 +def main(args): + if not os.path.exists(args.model_save_dir): + os.makedirs(args.model_save_dir) main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): - avg_cost, feature_out, word, mention, target = ner_net(word_dict_len, - label_dict_len) + avg_cost, feature_out, word, mention, target = ner_net( + args.word_dict_len, args.label_dict_len) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) + sgd_optimizer.minimize(avg_cost) crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) - sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) - sgd_optimizer.minimize(avg_cost) - (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = fluid.layers.chunk_eval( input=crf_decode, label=target, chunk_scheme="IOB", - num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) + num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0))) chunk_evaluator = fluid.metrics.ChunkEvaluator() @@ -233,28 +288,33 @@ def main(train_data_file, test_data_file, model_save_dir, num_passes): train_reader = paddle.batch( paddle.reader.shuffle( - reader.file_reader(train_data_file), buf_size=2000000), - batch_size=BATCH_SIZE) + reader.file_reader(args.train_data_dir), buf_size=2000000), + batch_size=args.batch_size) test_reader = paddle.batch( paddle.reader.shuffle( - reader.file_reader(test_data_file), buf_size=2000000), - batch_size=BATCH_SIZE) + reader.file_reader(args.test_data_dir), buf_size=2000000), + batch_size=args.batch_size) - place = fluid.CUDAPlace(0) + place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace() feeder = fluid.DataFeeder( feed_list=[word, mention, target], place=place) exe = fluid.Executor(place) exe.run(startup) - train_exe = fluid.ParallelExecutor( - loss_name=avg_cost.name, use_cuda=True) - test_exe = fluid.ParallelExecutor( - use_cuda=True, - main_program=inference_program, - share_vars_from=train_exe) + if args.parallel: + train_exe = fluid.ParallelExecutor( + loss_name=avg_cost.name, use_cuda=(args.device == 'GPU')) + test_exe = fluid.ParallelExecutor( + use_cuda=(args.device == 'GPU'), + main_program=inference_program, + share_vars_from=train_exe) + else: + train_exe = exe + test_exe = exe + batch_id = 0 - for pass_id in xrange(num_passes): + for pass_id in xrange(args.num_passes): chunk_evaluator.reset() train_reader_iter = train_reader() start_time = time.time() @@ -286,15 +346,13 @@ def main(train_data_file, test_data_file, model_save_dir, num_passes): [num_infer_chunks, num_label_chunks, num_correct_chunks]) print("[Test] precision:" + str(p) + ", recall:" + str(r) + ", f1:" + str(f1)) - save_dirname = os.path.join(model_save_dir, + save_dirname = os.path.join(args.model_save_dir, "params_pass_%d" % pass_id) - fluid.io.save_inference_model(save_dirname, ['word', 'mention'], - [crf_decode], exe) + fluid.io.save_inference_model( + save_dirname, ['word', 'mention', 'target'], [crf_decode], exe) if __name__ == "__main__": - main( - train_data_file="./data/train_files", - test_data_file="./data/test_files", - model_save_dir="./output", - num_passes=1000) + args = parse_args() + print_arguments(args) + main(args) diff --git a/fluid/ocr_recognition/scripts/train.sh b/fluid/ocr_recognition/scripts/train.sh index ceb7c06c0548e1d5ff90651d2fd8624288cc8804..f506530390388e365c22fb7fe1cb32f553e621d6 100644 --- a/fluid/ocr_recognition/scripts/train.sh +++ b/fluid/ocr_recognition/scripts/train.sh @@ -18,10 +18,6 @@ elif [ "$mode" = "GPU" ]; then save_model_dir="gpu_model" parallel="True" elif [ "$mode" = "MKLDNN" ]; then - if [ $core_num -gt $batch_size ]; then - echo "Batch size should be greater or equal to the number of - available cores, when parallel mode is set to True." - fi use_gpu="False" save_model_dir="mkldnn_model" parallel="False"