infer.py 6.6 KB
Newer Older
1
import numpy as np
2 3 4
import argparse
import time

5
import paddle.fluid as fluid
6
import paddle.fluid.profiler as profiler
7
import paddle
J
jiaozhenyu 已提交
8

9 10 11
import reader


12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
def parse_args():
    parser = argparse.ArgumentParser("Run inference.")
    parser.add_argument(
        '--batch_size',
        type=int,
        default=6,
        help='The size of a batch. (default: %(default)d)')
    parser.add_argument(
        '--device',
        type=str,
        default='GPU',
        choices=['CPU', 'GPU'],
        help='The device type. (default: %(default)s)')
    parser.add_argument(
        '--model_path',
        type=str,
28
        default='output/params_pass_0',
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
        help='A path to the model. (default: %(default)s)')
    parser.add_argument(
        '--test_data_dir',
        type=str,
        default='data/test_files',
        help='A directory with test data files. (default: %(default)s)')
    parser.add_argument(
        '--test_label_file',
        type=str,
        default='data/label_dict',
        help='A file with test labels. (default: %(default)s)')
    parser.add_argument(
        '--num_passes', type=int, default=1, help='The number of passes.')
    parser.add_argument(
        '--skip_pass_num',
        type=int,
        default=0,
        help='The first num of passes to skip in statistics calculations.')
    parser.add_argument(
        '--profile', action='store_true', help='If set, do profiling.')
    args = parser.parse_args()
    return args


def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
55
    for arg, value in sorted(vars(args).items()):
56 57 58 59
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')


60 61 62 63
def load_reverse_dict(dict_path):
    return dict((idx, line.strip().split("\t")[0])
                for idx, line in enumerate(open(dict_path, "r").readlines()))

64

65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
def to_lodtensor(data, place):
    seq_lens = [len(seq) for seq in data]
    cur_len = 0
    lod = [cur_len]
    for l in seq_lens:
        cur_len += l
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int64")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
    res = fluid.LoDTensor()
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res


80
def infer(args):
81 82 83 84 85 86
    word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
    mention = fluid.layers.data(
        name='mention', shape=[1], dtype='int64', lod_level=1)
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)

87
    label_reverse_dict = load_reverse_dict(args.test_label_file)
88 89

    test_data = paddle.batch(
90 91
        reader.file_reader(args.test_data_dir), batch_size=args.batch_size)
    place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace()
92 93 94 95 96 97
    feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place)
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names,
98 99 100 101 102 103 104 105 106 107 108 109 110
         fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
        total_passes = args.num_passes + args.skip_pass_num
        batch_times = [0] * total_passes
        word_counts = [0] * total_passes
        wpses = [0] * total_passes
        all_iters = 0
        for pass_id in range(total_passes):
            if pass_id < args.skip_pass_num:
                print("Warm-up pass")
            if pass_id == args.skip_pass_num:
                profiler.reset_profiler()
            iters = 0
            for data in test_data():
111 112
                word = to_lodtensor(list(map(lambda x: x[0], data)), place)
                mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
113

114 115
                start = time.time()
                crf_decode = exe.run(inference_program,
116 117
                                     feed={"word": word,
                                           "mention": mention},
118 119 120 121 122 123 124
                                     fetch_list=fetch_targets,
                                     return_numpy=False)
                batch_time = time.time() - start
                lod_info = (crf_decode[0].lod())[0]
                np_data = np.array(crf_decode[0])
                word_count = 0
                assert len(data) == len(lod_info) - 1
125
                for sen_index in range(len(data)):
126 127 128
                    assert len(data[sen_index][0]) == lod_info[
                        sen_index + 1] - lod_info[sen_index]
                    word_index = 0
129 130
                    for tag_index in range(lod_info[sen_index],
                                           lod_info[sen_index + 1]):
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
                        word = str(data[sen_index][0][word_index])
                        gold_tag = label_reverse_dict[data[sen_index][2][
                            word_index]]
                        tag = label_reverse_dict[np_data[tag_index][0]]
                        word_index += 1
                    word_count += word_index
                batch_times[pass_id] += batch_time
                word_counts[pass_id] += word_count
                iters += 1
                all_iters += 1
            batch_times[pass_id] /= iters
            word_counts[pass_id] /= iters
            wps = word_counts[pass_id] / batch_times[pass_id]
            wpses[pass_id] = wps

            print(
                "Pass: %d, iterations (total): %d (%d), latency: %.5f s, words: %d, wps: %f"
                % (pass_id, iters, all_iters, batch_times[pass_id],
                   word_counts[pass_id], wps))

    # Postprocess benchmark data
    latencies = batch_times[args.skip_pass_num:]
    latency_avg = np.average(latencies)
    latency_std = np.std(latencies)
    latency_pc99 = np.percentile(latencies, 99)
    wps_avg = np.average(wpses)
    wps_std = np.std(wpses)
    wps_pc01 = np.percentile(wpses, 1)

    # Benchmark output
    print('\nTotal passes (incl. warm-up): %d' % (total_passes))
    print('Total iterations (incl. warm-up): %d' % (all_iters))
    print('Total examples (incl. warm-up): %d' % (all_iters * args.batch_size))
    print('avg latency: %.5f, std latency: %.5f, 99pc latency: %.5f' %
          (latency_avg, latency_std, latency_pc99))
    print('avg wps: %.5f, std wps: %.5f, wps for 99pc latency: %.5f' %
          (wps_avg, wps_std, wps_pc01))
168 169 170


if __name__ == "__main__":
171 172 173 174 175 176 177 178 179 180 181
    args = parse_args()
    print_arguments(args)
    if args.profile:
        if args.device == 'GPU':
            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
                infer(args)
        else:
            with profiler.profiler('CPU', sorted_key='total') as cpuprof:
                infer(args)
    else:
        infer(args)