from __future__ import print_function import paddle.v2 as paddle import paddle.fluid as fluid from utility import add_arguments, print_arguments, to_lodtensor, get_ctc_feeder_data, get_attention_feeder_for_infer import paddle.fluid.profiler as profiler from crnn_ctc_model import ctc_infer from attention_model import attention_infer import numpy as np import data_reader import argparse import functools import os import time parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('model', str, "crnn_ctc", "Which type of network to be used. 'crnn_ctc' or 'attention'") add_arg('model_path', str, None, "The model path to be used for inference.") add_arg('input_images_dir', str, None, "The directory of images.") add_arg('input_images_list', str, None, "The list file of images.") add_arg('dict', str, None, "The dictionary. The result of inference will be index sequence if the dictionary was None.") add_arg('use_gpu', bool, True, "Whether use GPU to infer.") add_arg('iterations', int, 0, "The number of iterations. Zero or less means whole test set. More than 0 means the test set might be looped until # of iterations is reached.") add_arg('profile', bool, False, "Whether to use profiling.") add_arg('skip_batch_num', int, 0, "The number of first minibatches to skip as warm-up for better performance test.") add_arg('batch_size', int, 1, "The minibatch size.") # yapf: enable def inference(args): """OCR inference""" if args.model == "crnn_ctc": infer = ctc_infer get_feeder_data = get_ctc_feeder_data else: infer = attention_infer get_feeder_data = get_attention_feeder_for_infer eos = 1 sos = 0 num_classes = data_reader.num_classes() data_shape = data_reader.data_shape() # define network images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') ids = infer(images, num_classes, use_cudnn=True if args.use_gpu else False) # data reader infer_reader = data_reader.inference( batch_size=args.batch_size, infer_images_dir=args.input_images_dir, infer_list_file=args.input_images_list, cycle=True if args.iterations > 0 else False, model=args.model) # prepare environment place = fluid.CPUPlace() if args.use_gpu: place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load dictionary dict_map = None if args.dict is not None and os.path.isfile(args.dict): dict_map = {} with open(args.dict) as dict_file: for i, word in enumerate(dict_file): dict_map[i] = word.strip() print("Loaded dict from %s" % args.dict) # load init model model_dir = args.model_path model_file_name = None if not os.path.isdir(args.model_path): model_dir = os.path.dirname(args.model_path) model_file_name = os.path.basename(args.model_path) fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name) print("Init model from: %s." % args.model_path) batch_times = [] iters = 0 for data in infer_reader(): feed_dict = get_feeder_data(data, place) if args.iterations > 0 and iters == args.iterations + args.skip_batch_num: break if iters < args.skip_batch_num: print("Warm-up itaration") if iters == args.skip_batch_num: profiler.reset_profiler() start = time.time() result = exe.run(fluid.default_main_program(), feed=feed_dict, fetch_list=[ids], return_numpy=False) indexes = prune(np.array(result[0]).flatten(), 0, 1) batch_time = time.time() - start fps = args.batch_size / batch_time batch_times.append(batch_time) if dict_map is not None: print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( iters, batch_time, fps, [dict_map[index] for index in indexes], )) else: print("Iteration %d, latency: %.5f s, fps: %f, result: %s" % ( iters, batch_time, fps, indexes, )) iters += 1 latencies = batch_times[args.skip_batch_num:] latency_avg = np.average(latencies) latency_pc99 = np.percentile(latencies, 99) fpses = np.divide(args.batch_size, latencies) fps_avg = np.average(fpses) fps_pc99 = np.percentile(fpses, 1) # Benchmark output print('\nTotal examples (incl. warm-up): %d' % (iters * args.batch_size)) print('average latency: %.5f s, 99pc latency: %.5f s' % (latency_avg, latency_pc99)) print('average fps: %.5f, fps for 99pc latency: %.5f' % (fps_avg, fps_pc99)) def prune(words, sos, eos): """Remove unused tokens in prediction result.""" start_index = 0 end_index = len(words) if sos in words: start_index = np.where(words == sos)[0][0] + 1 if eos in words: end_index = np.where(words == eos)[0][0] return words[start_index:end_index] def main(): args = parser.parse_args() print_arguments(args) if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: inference(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: inference(args) else: inference(args) if __name__ == "__main__": main()