diff --git a/fluid/image_classification/caffe2fluid/kaffe/graph.py b/fluid/image_classification/caffe2fluid/kaffe/graph.py index b342ce4dcb3dda6e38597457839ba376c9a27354..baea3cc1dc9431d07d0d3ca7191a429d1ef0f398 100644 --- a/fluid/image_classification/caffe2fluid/kaffe/graph.py +++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py @@ -122,8 +122,8 @@ class Graph(object): def compute_output_shapes(self): sorted_nodes = self.topologically_sorted() for node in sorted_nodes: - node.output_shape = make_tensor(*NodeKind.compute_output_shape( - node)) + node.output_shape = make_tensor( + *NodeKind.compute_output_shape(node)) def replaced(self, new_nodes): return Graph(nodes=new_nodes, name=self.name, trace=self.output_trace) diff --git a/fluid/image_classification/reader.py b/fluid/image_classification/reader.py index b503b67ce09fba80bc49a07665ba0290e75f1ed1..3ea26ccbfead97dc97e8858ee05a6582f2b3bc9e 100644 --- a/fluid/image_classification/reader.py +++ b/fluid/image_classification/reader.py @@ -160,5 +160,5 @@ def val(file_list=TEST_LIST): return _reader_creator(file_list, 'val', shuffle=False) -def test(file_list): +def test(file_list=TEST_LIST): return _reader_creator(file_list, 'test', shuffle=False) diff --git a/fluid/image_classification/train.py b/fluid/image_classification/train.py index 74588e21c93e40ee7f5bcde7d6cbbc7c873278ba..51bf9901246cb554baaef22a8e526d0ecd81bd0a 100644 --- a/fluid/image_classification/train.py +++ b/fluid/image_classification/train.py @@ -157,7 +157,8 @@ def train(args): test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) + train_exe = fluid.ParallelExecutor( + use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name) fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] diff --git a/fluid/mnist/_ce.py b/fluid/mnist/_ce.py index 8767e5692ea02d6e181cde714d4c53feba05e3ab..9c2dba53526d2e976252fce05c7ff7f0f44b39b2 100644 --- a/fluid/mnist/_ce.py +++ b/fluid/mnist/_ce.py @@ -19,6 +19,7 @@ tracking_kpis = [ train_duration_kpi, ] + def parse_log(log): ''' This method should be implemented by model developers. @@ -37,7 +38,7 @@ def parse_log(log): ''' for line in log.split('\n'): fs = line.strip().split('\t') - print (fs) + print(fs) if len(fs) == 3 and fs[0] == 'kpis': kpi_name = fs[1] kpi_value = float(fs[2]) @@ -50,12 +51,11 @@ def log_to_ce(log): kpi_tracker[kpi.name] = kpi for (kpi_name, kpi_value) in parse_log(log): - print (kpi_name, kpi_value) + print(kpi_name, kpi_value) kpi_tracker[kpi_name].add_record(kpi_value) kpi_tracker[kpi_name].persist() if __name__ == '__main__': log = sys.stdin.read() - log_to_ce(log) - + log_to_ce(log) diff --git a/fluid/mnist/model.py b/fluid/mnist/model.py index aa470e86d2f555a7cf778793de4cf47fc4841868..e719cca404092fdf96484093d39f2c7c54cd9988 100644 --- a/fluid/mnist/model.py +++ b/fluid/mnist/model.py @@ -183,10 +183,10 @@ def run_benchmark(model, args): (pass_end - pass_start))) #Note: The following logs are special for CE monitoring. #Other situations do not need to care about these logs. - print ("kpis train_acc %f" % train_avg_acc) - print ("kpis train_cost %f" % train_avg_loss) - print ("kpis test_acc %f" % test_avg_acc) - print ("kpis train_duration %f" % (pass_end - pass_start)) + print("kpis train_acc %f" % train_avg_acc) + print("kpis train_cost %f" % train_avg_loss) + print("kpis test_acc %f" % test_avg_acc) + print("kpis train_duration %f" % (pass_end - pass_start)) if __name__ == '__main__': diff --git a/fluid/ocr_recognition/README.md b/fluid/ocr_recognition/README.md index 4475695fdd06596b44967bd2d5d44530cccbf2e5..50b72440818384a0d8e80ab214faaabddbd93f90 100644 --- a/fluid/ocr_recognition/README.md +++ b/fluid/ocr_recognition/README.md @@ -113,6 +113,10 @@ data/test_images/00003.jpg ``` env CUDA_VISIABLE_DEVICES=0 python ctc_train.py ``` +使用默认数据在CPU上训练: +``` +env OMP_NUM_THREADS= python ctc_train.py --use_gpu False --parallel=False +``` 使用默认数据在GPU多卡上训练: diff --git a/fluid/ocr_recognition/crnn_ctc_model.py b/fluid/ocr_recognition/crnn_ctc_model.py index 79cf7b23954ce3331f46c50ee165dac720deae43..a5d4c70f868a6c973ff3e8b372a2eb387d1f191f 100644 --- a/fluid/ocr_recognition/crnn_ctc_model.py +++ b/fluid/ocr_recognition/crnn_ctc_model.py @@ -12,7 +12,8 @@ def conv_bn_pool(input, bias=None, param_0=None, is_test=False, - pooling=True): + pooling=True, + use_cudnn=False): tmp = input for i in xrange(group): tmp = fluid.layers.conv2d( @@ -22,7 +23,7 @@ def conv_bn_pool(input, padding=1, param_attr=param if param_0 is None else param_0, act=None, # LinearActivation - use_cudnn=True) + use_cudnn=use_cudnn) tmp = fluid.layers.batch_norm( input=tmp, act=act, @@ -35,13 +36,17 @@ def conv_bn_pool(input, pool_size=2, pool_type='max', pool_stride=2, - use_cudnn=True, + use_cudnn=use_cudnn, ceil_mode=True) return tmp -def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): +def ocr_convs(input, + regularizer=None, + gradient_clip=None, + is_test=False, + use_cudnn=False): b = fluid.ParamAttr( regularizer=regularizer, gradient_clip=gradient_clip, @@ -56,12 +61,36 @@ def ocr_convs(input, regularizer=None, gradient_clip=None, is_test=False): initializer=fluid.initializer.Normal(0.0, 0.01)) tmp = input tmp = conv_bn_pool( - tmp, 2, [16, 16], param=w1, bias=b, param_0=w0, is_test=is_test) + tmp, + 2, [16, 16], + param=w1, + bias=b, + param_0=w0, + is_test=is_test, + use_cudnn=use_cudnn) - tmp = conv_bn_pool(tmp, 2, [32, 32], param=w1, bias=b, is_test=is_test) - tmp = conv_bn_pool(tmp, 2, [64, 64], param=w1, bias=b, is_test=is_test) tmp = conv_bn_pool( - tmp, 2, [128, 128], param=w1, bias=b, is_test=is_test, pooling=False) + tmp, + 2, [32, 32], + param=w1, + bias=b, + is_test=is_test, + use_cudnn=use_cudnn) + tmp = conv_bn_pool( + tmp, + 2, [64, 64], + param=w1, + bias=b, + is_test=is_test, + use_cudnn=use_cudnn) + tmp = conv_bn_pool( + tmp, + 2, [128, 128], + param=w1, + bias=b, + is_test=is_test, + pooling=False, + use_cudnn=use_cudnn) return tmp @@ -70,12 +99,14 @@ def encoder_net(images, rnn_hidden_size=200, regularizer=None, gradient_clip=None, - is_test=False): + is_test=False, + use_cudnn=False): conv_features = ocr_convs( images, regularizer=regularizer, gradient_clip=gradient_clip, - is_test=is_test) + is_test=is_test, + use_cudnn=use_cudnn) sliced_feature = fluid.layers.im2sequence( input=conv_features, stride=[1, 1], @@ -142,7 +173,11 @@ def ctc_train_net(images, label, args, num_classes): learning_rate_decay = None regularizer = fluid.regularizer.L2Decay(L2_RATE) - fc_out = encoder_net(images, num_classes, regularizer=regularizer) + fc_out = encoder_net( + images, + num_classes, + regularizer=regularizer, + use_cudnn=True if args.use_gpu else False) cost = fluid.layers.warpctc( input=fc_out, label=label, blank=num_classes, norm_by_times=True) sum_cost = fluid.layers.reduce_sum(cost) @@ -166,19 +201,18 @@ def ctc_train_net(images, label, args, num_classes): if args.average_window > 0: model_average = fluid.optimizer.ModelAverage( args.average_window, - params_grads, min_average_window=args.min_average_window, max_average_window=args.max_average_window) return sum_cost, error_evaluator, inference_program, model_average -def ctc_infer(images, num_classes): - fc_out = encoder_net(images, num_classes, is_test=True) +def ctc_infer(images, num_classes, use_cudnn): + fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes) -def ctc_eval(images, label, num_classes): - fc_out = encoder_net(images, num_classes, is_test=True) +def ctc_eval(images, label, num_classes, use_cudnn): + fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn) decoded_out = fluid.layers.ctc_greedy_decoder( input=fc_out, blank=num_classes) diff --git a/fluid/ocr_recognition/ctc_reader.py b/fluid/ocr_recognition/ctc_reader.py index db05dbeae73b67b12aebacdc84a04d5b180d2132..9cbe310c9656d0501915281a5bc0f5236a42f6b1 100644 --- a/fluid/ocr_recognition/ctc_reader.py +++ b/fluid/ocr_recognition/ctc_reader.py @@ -25,7 +25,7 @@ class DataGenerator(object): def __init__(self): pass - def train_reader(self, img_root_dir, img_label_list, batchsize): + def train_reader(self, img_root_dir, img_label_list, batchsize, cycle): ''' Reader interface for training. @@ -35,6 +35,10 @@ class DataGenerator(object): :param img_label_list: The path of the file for training. :type img_label_list: str + :param cycle: If number of iterations is greater than dataset_size / batch_size + it reiterates dataset over as many times as necessary. + :type cycle: bool + ''' img_label_lines = [] @@ -65,24 +69,29 @@ class DataGenerator(object): def reader(): sizes = len(img_label_lines) / batchsize - for i in range(sizes): - result = [] - sz = [0, 0] - for j in range(batchsize): - line = img_label_lines[i * batchsize + j] - # h, w, img_name, labels - items = line.split(' ') - - label = [int(c) for c in items[-1].split(',')] - img = Image.open(os.path.join(img_root_dir, items[ - 2])).convert('L') #zhuanhuidu - if j == 0: - sz = img.size - img = img.resize((sz[0], sz[1])) - img = np.array(img) - 127.5 - img = img[np.newaxis, ...] - result.append([img, label]) - yield result + if sizes == 0: + raise ValueError('Batch size is bigger than the dataset size.') + while True: + for i in range(sizes): + result = [] + sz = [0, 0] + for j in range(batchsize): + line = img_label_lines[i * batchsize + j] + # h, w, img_name, labels + items = line.split(' ') + + label = [int(c) for c in items[-1].split(',')] + img = Image.open(os.path.join(img_root_dir, items[ + 2])).convert('L') #zhuanhuidu + if j == 0: + sz = img.size + img = img.resize((sz[0], sz[1])) + img = np.array(img) - 127.5 + img = img[np.newaxis, ...] + result.append([img, label]) + yield result + if not cycle: + break return reader @@ -111,7 +120,7 @@ class DataGenerator(object): return reader - def infer_reader(self, img_root_dir=None, img_label_list=None): + def infer_reader(self, img_root_dir=None, img_label_list=None, cycle=False): '''A reader interface for inference. :param img_root_dir: The root path of the images for training. @@ -122,11 +131,15 @@ class DataGenerator(object): was None. If img_label_list was set to None, it will read image path from stdin. :type img_root_dir: str + + :param cycle: If number of iterations is greater than dataset_size / + batch_size it reiterates dataset over as many times as necessary. + :type cycle: bool ''' def reader(): - if img_label_list is not None: - for line in open(img_label_list): + def yield_img_and_label(lines): + for line in lines: if img_root_dir is not None: # h, w, img_name, labels img_name = line.split(' ')[2] @@ -138,6 +151,16 @@ class DataGenerator(object): img = img[np.newaxis, ...] label = [int(c) for c in line.split(' ')[3].split(',')] yield img, label + + if img_label_list is not None: + lines = [] + with open(img_label_list) as f: + lines = f.readlines() + for img, label in yield_img_and_label(lines): + yield img, label + while cycle: + for img, label in yield_img_and_label(lines): + yield img, label else: while True: img_path = raw_input("Please input the path of image: ") @@ -161,14 +184,15 @@ def data_shape(): return DATA_SHAPE -def train(batch_size, train_images_dir=None, train_list_file=None): +def train(batch_size, train_images_dir=None, train_list_file=None, cycle=False): generator = DataGenerator() if train_images_dir is None: data_dir = download_data() train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) if train_list_file is None: train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) - return generator.train_reader(train_images_dir, train_list_file, batch_size) + return generator.train_reader(train_images_dir, train_list_file, batch_size, + cycle) def test(batch_size=1, test_images_dir=None, test_list_file=None): @@ -182,10 +206,14 @@ def test(batch_size=1, test_images_dir=None, test_list_file=None): generator.test_reader(test_images_dir, test_list_file), batch_size) -def inference(infer_images_dir=None, infer_list_file=None): +def inference(batch_size=1, + infer_images_dir=None, + infer_list_file=None, + cycle=False): generator = DataGenerator() return paddle.batch( - generator.infer_reader(infer_images_dir, infer_list_file), 1) + generator.infer_reader(infer_images_dir, infer_list_file, cycle), + batch_size) def download_data(): diff --git a/fluid/ocr_recognition/ctc_train.py b/fluid/ocr_recognition/ctc_train.py index dde07e51887ab6d7724f9b8893ae49479ee7b9a7..d2d79d78f717959e0a5c763e7d6b5d8fcf9f9bdc 100644 --- a/fluid/ocr_recognition/ctc_train.py +++ b/fluid/ocr_recognition/ctc_train.py @@ -1,5 +1,6 @@ """Trainer for OCR CTC model.""" import paddle.fluid as fluid +import paddle.fluid.profiler as profiler from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from crnn_ctc_model import ctc_train_net import ctc_reader @@ -14,7 +15,7 @@ parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('total_step', int, 720000, "Number of training iterations.") +add_arg('total_step', int, 720000, "The number of iterations. Zero or less means whole training set. More than 0 means the training set might be looped until # of iterations is reached.") add_arg('log_period', int, 1000, "Log period.") add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.") add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.") @@ -25,6 +26,9 @@ add_arg('min_average_window',int, 10000, "Min average window.") add_arg('max_average_window',int, 12500, "Max average window. It is proposed to be set as the number of minibatch in a pass.") add_arg('average_window', float, 0.15, "Average window.") add_arg('parallel', bool, False, "Whether use parallel training.") +add_arg('profile', bool, False, "Whether to use profiling.") +add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.") +add_arg('skip_test', bool, False, "Whether to skip test phase.") # yapf: enable @@ -49,7 +53,8 @@ def train(args, data_reader=ctc_reader): train_reader = data_reader.train( args.batch_size, train_images_dir=train_images, - train_list_file=train_list) + train_list_file=train_list, + cycle=args.total_step > 0) test_reader = data_reader.test( test_images_dir=test_images, test_list_file=test_list) @@ -74,7 +79,7 @@ def train(args, data_reader=ctc_reader): error_evaluator.reset(exe) if args.parallel: train_exe = fluid.ParallelExecutor( - use_cuda=True, loss_name=sum_cost.name) + use_cuda=True if args.use_gpu else False, loss_name=sum_cost.name) fetch_vars = [sum_cost] + error_evaluator.metrics @@ -85,8 +90,8 @@ def train(args, data_reader=ctc_reader): feed=get_feeder_data(data, place)) results = [np.array(result).sum() for result in results] else: - results = exe.run(feed=get_feeder_data(data, place), - fetch_list=fetch_vars) + results = train_exe.run(feed=get_feeder_data(data, place), + fetch_list=fetch_vars) results = [result[0] for result in results] return results @@ -105,17 +110,29 @@ def train(args, data_reader=ctc_reader): print "Saved model to: %s/%s." % (args.save_model_dir, filename) iter_num = 0 - while True: + stop = False + while not stop: total_loss = 0.0 total_seq_error = 0.0 + batch_times = [] # train a pass for data in train_reader(): - iter_num += 1 - if iter_num > args.total_step: - return + if args.total_step > 0 and iter_num == args.total_step + args.skip_batch_num: + stop = True + break + if iter_num < args.skip_batch_num: + print("Warm-up iteration") + if iter_num == args.skip_batch_num: + profiler.reset_profiler() + start = time.time() results = train_one_batch(data) + batch_time = time.time() - start + fps = args.batch_size / batch_time + batch_times.append(batch_time) total_loss += results[0] total_seq_error += results[2] + + iter_num += 1 # training log if iter_num % args.log_period == 0: print "\nTime: %s; Iter[%d]; Avg Warp-CTC loss: %.3f; Avg seq err: %.3f" % ( @@ -127,7 +144,7 @@ def train(args, data_reader=ctc_reader): total_seq_error = 0.0 # evaluate - if iter_num % args.eval_period == 0: + if not args.skip_test and iter_num % args.eval_period == 0: if model_average: with model_average.apply(exe): test(iter_num) @@ -141,12 +158,35 @@ def train(args, data_reader=ctc_reader): save_model(args, exe, iter_num) else: save_model(args, exe, iter_num) + # Postprocess benchmark data + latencies = batch_times[args.skip_batch_num:] + latency_avg = np.average(latencies) + latency_pc99 = np.percentile(latencies, 99) + fpses = np.divide(args.batch_size, latencies) + fps_avg = np.average(fpses) + fps_pc99 = np.percentile(fpses, 1) + + # Benchmark output + print('\nTotal examples (incl. warm-up): %d' % + (iter_num * args.batch_size)) + print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg, + latency_pc99)) + print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, + fps_pc99)) def main(): args = parser.parse_args() print_arguments(args) - train(args, data_reader=ctc_reader) + if args.profile: + if args.use_gpu: + with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + train(args, data_reader=ctc_reader) + else: + with profiler.profiler("CPU", sorted_key='total') as cpuprof: + train(args, data_reader=ctc_reader) + else: + train(args, data_reader=ctc_reader) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/infer.py b/fluid/ocr_recognition/infer.py index 080e3f5f84efbb73e3c2381e809222fd2a90c416..154242c9e3ca8fea26f34b5cda0c2bac5a3d0ef1 100644 --- a/fluid/ocr_recognition/infer.py +++ b/fluid/ocr_recognition/infer.py @@ -1,5 +1,6 @@ import paddle.v2 as paddle import paddle.fluid as fluid +import paddle.fluid.profiler as profiler from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data from crnn_ctc_model import ctc_infer import numpy as np @@ -7,6 +8,7 @@ import ctc_reader import argparse import functools import os +import time parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) @@ -16,6 +18,10 @@ add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_list', str, None, "The list file of images.") add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.") add_arg('use_gpu', bool, True, "Whether use GPU to infer.") +add_arg('iterations', int, 0, "The number of iterations. Zero or less means whole test set. More than 0 means the test set might be looped until # of iterations is reached.") +add_arg('profile', bool, False, "Whether to use profiling.") +add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.") +add_arg('batch_size', int, 1, "The minibatch size.") # yapf: enable @@ -25,11 +31,14 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): data_shape = data_reader.data_shape() # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - sequence = infer(images, num_classes) + sequence = infer( + images, num_classes, use_cudnn=True if args.use_gpu else False) # data reader infer_reader = data_reader.inference( + batch_size=args.batch_size, infer_images_dir=args.input_images_dir, - infer_list_file=args.input_images_list) + infer_list_file=args.input_images_list, + cycle=True if args.iterations > 0 else False) # prepare environment place = fluid.CPUPlace() if args.use_gpu: @@ -56,23 +65,67 @@ def inference(args, infer=ctc_infer, data_reader=ctc_reader): fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) print "Init model from: %s." % args.model_path + batch_times = [] + iters = 0 for data in infer_reader(): + if args.iterations > 0 and iters == args.iterations + args.skip_batch_num: + break + if iters < args.skip_batch_num: + print("Warm-up itaration") + if iters == args.skip_batch_num: + profiler.reset_profiler() + + start = time.time() result = exe.run(fluid.default_main_program(), feed=get_feeder_data( data, place, need_label=False), fetch_list=[sequence], return_numpy=False) + batch_time = time.time() - start + fps = args.batch_size / batch_time + batch_times.append(batch_time) indexes = np.array(result[0]).flatten() if dict_map is not None: - print "result: %s" % ([dict_map[index] for index in indexes], ) + print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( + iters, + batch_time, + fps, + [dict_map[index] for index in indexes], ) else: - print "result: %s" % (indexes, ) + print "Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( + iters, + batch_time, + fps, + indexes, ) + + iters += 1 + + latencies = batch_times[args.skip_batch_num:] + latency_avg = np.average(latencies) + latency_pc99 = np.percentile(latencies, 99) + fpses = np.divide(args.batch_size, latencies) + fps_avg = np.average(fpses) + fps_pc99 = np.percentile(fpses, 1) + + # Benchmark output + print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size)) + print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg, + latency_pc99)) + print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99)) def main(): args = parser.parse_args() print_arguments(args) - inference(args, data_reader=ctc_reader) + if args.profile: + if args.use_gpu: + with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + inference(args, data_reader=ctc_reader) + else: + with profiler.profiler("CPU", sorted_key='total') as cpuprof: + inference(args, data_reader=ctc_reader) + else: + inference(args, data_reader=ctc_reader) if __name__ == "__main__": diff --git a/fluid/ocr_recognition/scripts/README.md b/fluid/ocr_recognition/scripts/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6320834e6c29fa24496527b265e2aa0d52475ad6 --- /dev/null +++ b/fluid/ocr_recognition/scripts/README.md @@ -0,0 +1,46 @@ +## Introduction +Scripts enclosed in the folder serve as examples of commands that start training +and inference of a model, and are subject to further customisation. + +# Running with MKL-DNN +In order to run training or inference using MKL-DNN library, please use +`FLAGS_use_mkldnn=1` environmental variable. + +## Prerequisites +In order to run the training and inference, no special requirements are posed. + +## Training +To run training on *CPU*, please execute: + +```sh +source train.sh CPU +``` + +To run training on *CPU* with MKL-DNN, please execute: + +```sh +source train.sh MKLDNN +``` + +To run training on *GPU*, please execute: + +```sh +source train.sh GPU +``` + +## Inference +To perform inference on the trained model using *CPU*, please run: +```sh +source infer.sh CPU +``` + +To perform inference on the trained model using *CPU* with MKL-DNN, please run: +```sh +source infer.sh MKLDNN +``` + +To perform inference on the trained model using *GPU*, please run: + +```sh +source infer.sh GPU +``` diff --git a/fluid/ocr_recognition/scripts/infer.sh b/fluid/ocr_recognition/scripts/infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..6273ad153157138b3b7ecafece461ee01eda2955 --- /dev/null +++ b/fluid/ocr_recognition/scripts/infer.sh @@ -0,0 +1,42 @@ +#!/bin/bash +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +mode=$1 # gpu, cpu, mkldnn +if [ "$mode" = "CPU" ]; then + use_gpu="False" + model_path="cpu_model" +elif [ "$mode" = "GPU" ]; then + use_gpu="True" + model_path="gpu_model" +elif [ "$mode" = "MKLDNN" ]; then + use_gpu="False" + model_path="mkldnn_model" + export FLAGS_use_mkldnn=1 +else + echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}" + exit 1 +fi + +ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` +if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi +else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi +fi + +python ../infer.py \ + --model_path $model_path/model_00001 \ + --input_images_list ~/.cache/paddle/dataset/ctc_data/data/test.list \ + --input_images_dir ~/.cache/paddle/dataset/ctc_data/data/test_images \ + --use_gpu $use_gpu \ + --batch_size 32 \ + --iterations 5 \ + --skip_batch_num 2 diff --git a/fluid/ocr_recognition/scripts/train.sh b/fluid/ocr_recognition/scripts/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..ceb7c06c0548e1d5ff90651d2fd8624288cc8804 --- /dev/null +++ b/fluid/ocr_recognition/scripts/train.sh @@ -0,0 +1,55 @@ +#!/bin/bash +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +batch_size=32 +core_num=`lscpu |grep -m1 "CPU(s)"|awk -F':' '{print $2}'|xargs` +mode=$1 # gpu, cpu, mkldnn +if [ "$mode" = "CPU" ]; then + if [ $core_num -gt $batch_size ]; then + echo "Batch size should be greater or equal to the number of + available cores, when parallel mode is set to True." + fi + use_gpu="False" + save_model_dir="cpu_model" + parallel="True" +elif [ "$mode" = "GPU" ]; then + use_gpu="True" + save_model_dir="gpu_model" + parallel="True" +elif [ "$mode" = "MKLDNN" ]; then + if [ $core_num -gt $batch_size ]; then + echo "Batch size should be greater or equal to the number of + available cores, when parallel mode is set to True." + fi + use_gpu="False" + save_model_dir="mkldnn_model" + parallel="False" + export FLAGS_use_mkldnn=1 +else + echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}" + exit 1 +fi + +ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` +if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi +else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi +fi + +python ../ctc_train.py \ + --use_gpu $use_gpu \ + --parallel $parallel \ + --batch_size $batch_size \ + --save_model_period 1 \ + --total_step 1 \ + --save_model_dir $save_model_dir +