local_predict.py 17.6 KB
Newer Older
D
dongdaxiang 已提交
1 2
# -*- coding: utf-8 -*-
"""
D
dongdaxiang 已提交
3
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
D
dongdaxiang 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import os
import google.protobuf.text_format
import numpy as np
import argparse
from .proto import general_model_config_pb2 as m_config
23
import paddle.inference as paddle_infer
D
dongdaxiang 已提交
24
import logging
25
import glob
D
dongdaxiang 已提交
26 27

logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
28
logger = logging.getLogger("LocalPredictor")
D
dongdaxiang 已提交
29 30
logger.setLevel(logging.INFO)

Z
zhangjun 已提交
31 32 33 34
precision_map = {
    'int8': paddle_infer.PrecisionType.Int8,
    'fp32': paddle_infer.PrecisionType.Float32,
    'fp16': paddle_infer.PrecisionType.Half,
T
TeslaZhao 已提交
35
    'bf16': 'bf16',
Z
zhangjun 已提交
36 37
}

D
dongdaxiang 已提交
38

W
wangjiawei04 已提交
39
class LocalPredictor(object):
40 41 42 43 44 45
    """
    Prediction in the current process of the local environment, in process
    call, Compared with RPC/HTTP, LocalPredictor has better performance, 
    because of no network and packaging load.
    """

D
dongdaxiang 已提交
46 47 48 49 50 51 52 53 54 55
    def __init__(self):
        self.feed_names_ = []
        self.fetch_names_ = []
        self.feed_types_ = {}
        self.fetch_types_ = {}
        self.feed_shapes_ = {}
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}
        self.fetch_names_to_type_ = {}

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    def search_suffix_files(self, model_path, target_suffix):
        """
        Find all files with the suffix xxx in the specified directory.

        Args:
            model_path: model directory, not None.
            target_suffix: filenames with target suffix, not None. e.g: *.pdmodel

        Returns:
            file_list, None, [] or [path, ] . 
        """
        if model_path is None or target_suffix is None:
            return None

        file_list = glob.glob(os.path.join(model_path, target_suffix))
        return file_list

73 74 75 76 77 78 79 80 81
    def load_model_config(self,
                          model_path,
                          use_gpu=False,
                          gpu_id=0,
                          use_profile=False,
                          thread_num=1,
                          mem_optim=True,
                          ir_optim=False,
                          use_trt=False,
Z
zhangjun 已提交
82 83
                          use_lite=False,
                          use_xpu=False,
Z
zhangjun 已提交
84
                          precision="fp32",
T
TeslaZhao 已提交
85 86 87 88
                          use_mkldnn=False,
                          mkldnn_cache_capacity=0,
                          mkldnn_op_list=None,
                          mkldnn_bf16_op_list=None,
89
                          use_feed_fetch_ops=False,
F
felixhjh 已提交
90 91
                          use_ascend_cl=False,
                          min_subgraph_size=3,
92 93
                          dynamic_shape_info={},
                          use_calib=False):
94
        """
95
        Load model configs and create the paddle predictor by Paddle Inference API.
96 97 98 99 100 101
   
        Args:
            model_path: model config path.
            use_gpu: calculating with gpu, False default.
            gpu_id: gpu id, 0 default.
            use_profile: use predictor profiles, False default.
T
TeslaZhao 已提交
102
            thread_num: thread nums of cpu math library, default 1. 
103 104 105
            mem_optim: memory optimization, True default.
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
Z
zhangjun 已提交
106
            use_lite: use Paddle-Lite Engint, False default
F
felixhjh 已提交
107 108 109
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
            use_lite: use Paddle-Lite Engint, False default
Z
zhangjun 已提交
110
            use_xpu: run predict on Baidu Kunlun, False default
Z
zhangjun 已提交
111
            precision: precision mode, "fp32" default
T
TeslaZhao 已提交
112 113 114 115
            use_mkldnn: use MKLDNN, False default.
            mkldnn_cache_capacity: cache capacity for input shapes, 0 default.
            mkldnn_op_list: op list accelerated using MKLDNN, None default.
            mkldnn_bf16_op_list: op list accelerated using MKLDNN bf16, None default.
116
            use_feed_fetch_ops: use feed/fetch ops, False default.
117
            use_ascend_cl: run predict on Huawei Ascend, False default
F
felixhjh 已提交
118 119
            min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default
            dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default 
120
            use_calib: use TensorRT calibration, False default
121
        """
H
HexToString 已提交
122
        gpu_id = int(gpu_id)
D
dongdaxiang 已提交
123 124 125 126 127
        client_config = "{}/serving_server_conf.prototxt".format(model_path)
        model_conf = m_config.GeneralModelConfig()
        f = open(client_config, 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
128 129 130 131 132 133 134 135 136

        # Init paddle_infer config
        # Paddle's model files and parameter files have multiple naming rules:
        #   1) __model__, __params__
        #   2) *.pdmodel, *.pdiparams
        #   3) __model__, conv2d_1.w_0, conv2d_2.w_0, fc_1.w_0, conv2d_1.b_0, ... 
        pdmodel_file_list = self.search_suffix_files(model_path, "*.pdmodel")
        pdiparams_file_list = self.search_suffix_files(model_path,
                                                       "*.pdiparams")
W
wangjiawei04 已提交
137
        if os.path.exists(os.path.join(model_path, "__params__")):
138
            # case 1) initializing
139 140 141
            config = paddle_infer.Config(
                os.path.join(model_path, "__model__"),
                os.path.join(model_path, "__params__"))
142 143 144 145 146 147 148 149
        elif pdmodel_file_list and len(
                pdmodel_file_list) > 0 and pdiparams_file_list and len(
                    pdiparams_file_list) > 0:
            # case 2) initializing
            logger.info("pdmodel_file_list:{}, pdiparams_file_list:{}".format(
                pdmodel_file_list, pdiparams_file_list))
            config = paddle_infer.Config(pdmodel_file_list[0],
                                         pdiparams_file_list[0])
W
wangjiawei04 已提交
150
        else:
151
            # case 3) initializing.
152 153 154
            config = paddle_infer.Config(model_path)

        logger.info(
T
TeslaZhao 已提交
155 156 157 158
            "LocalPredictor load_model_config params: model_path:{}, use_gpu:{}, "
            "gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{}, "
            "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, "
            "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
159
            "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, "
F
felixhjh 已提交
160
            "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}".format(
T
TeslaZhao 已提交
161 162 163
                model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
                ir_optim, use_trt, use_lite, use_xpu, precision, use_calib,
                use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
F
felixhjh 已提交
164 165
                mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl,
                min_subgraph_size, dynamic_shape_info))
D
dongdaxiang 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178

        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}

        for i, var in enumerate(model_conf.feed_var):
            self.feed_names_to_idx_[var.alias_name] = i
            self.feed_types_[var.alias_name] = var.feed_type
            self.feed_shapes_[var.alias_name] = var.shape

        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_names_to_idx_[var.alias_name] = i
179 180
            self.fetch_types_[var.alias_name] = var.fetch_type
            self.fetch_names_to_type_[var.alias_name] = var.shape
D
dongdaxiang 已提交
181

T
TeslaZhao 已提交
182
        # set precision of inference.
Z
zhangjun 已提交
183
        precision_type = paddle_infer.PrecisionType.Float32
184
        if precision is not None and precision.lower() in precision_map:
Z
zhangjun 已提交
185
            precision_type = precision_map[precision.lower()]
186 187 188
        else:
            logger.warning("precision error!!! Please check precision:{}".
                           format(precision))
T
TeslaZhao 已提交
189
        # set profile
190
        if use_profile:
D
dongdaxiang 已提交
191
            config.enable_profile()
T
TeslaZhao 已提交
192
        # set memory optimization
193 194
        if mem_optim:
            config.enable_memory_optim()
T
TeslaZhao 已提交
195
        # set ir optimization, threads of cpu math library
196
        config.switch_ir_optim(ir_optim)
T
TeslaZhao 已提交
197
        # use feed & fetch ops
198
        config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
T
TeslaZhao 已提交
199
        # pass optim
W
wangjiawei04 已提交
200
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
201

T
TeslaZhao 已提交
202 203 204 205
        # set cpu & mkldnn
        config.set_cpu_math_library_num_threads(thread_num)
        if use_mkldnn:
            config.enable_mkldnn()
T
TeslaZhao 已提交
206 207
            if precision_type == "bf16":
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
208 209 210 211 212
            if mkldnn_cache_capacity > 0:
                config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
            if mkldnn_op_list is not None:
                config.set_mkldnn_op(mkldnn_op_list)
        # set gpu
213 214 215 216 217 218
        if not use_gpu:
            config.disable_gpu()
        else:
            config.enable_use_gpu(100, gpu_id)
            if use_trt:
                config.enable_tensorrt_engine(
Z
zhangjun 已提交
219
                    precision_mode=precision_type,
220 221
                    workspace_size=1 << 20,
                    max_batch_size=32,
F
felixhjh 已提交
222
                    min_subgraph_size=min_subgraph_size,
223
                    use_static=False,
224
                    use_calib_mode=use_calib)
F
felixhjh 已提交
225 226 227

                if len(dynamic_shape_info):
                     config.set_trt_dynamic_shape_info(
F
felixhjh 已提交
228 229 230
                         dynamic_shape_info['min_input_shape'], 
                         dynamic_shape_info['max_input_shape'], 
                         dynamic_shape_info['opt_input_shape'])       
T
TeslaZhao 已提交
231
        # set lite
Z
zhangjun 已提交
232 233
        if use_lite:
            config.enable_lite_engine(
Z
zhangjun 已提交
234
                precision_mode=precision_type,
235 236 237
                zero_copy=True,
                passes_filter=[],
                ops_filter=[])
238
            config.switch_ir_optim(True)
T
TeslaZhao 已提交
239
        # set xpu
Z
zhangjun 已提交
240
        if use_xpu:
Z
zhangjun 已提交
241
            # 2MB l3 cache
T
TeslaZhao 已提交
242
            config.enable_xpu(8 * 1024 * 1024)
S
ShiningZhang 已提交
243
            config.set_xpu_device_id(gpu_id)
244 245 246
        # set ascend cl
        if use_ascend_cl:
            if use_lite:
S
ShiningZhang 已提交
247
                # for ascend 310
248 249 250 251 252 253 254 255 256
                nnadapter_device_names = "huawei_ascend_npu"
                nnadapter_context_properties = \
                    "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
                nnadapter_model_cache_dir = ""
                config.nnadapter() \
                .enable() \
                .set_device_names([nnadapter_device_names]) \
                .set_context_properties(nnadapter_context_properties) \
                .set_model_cache_dir(nnadapter_model_cache_dir)
S
ShiningZhang 已提交
257
            else:
S
ShiningZhang 已提交
258
                # for ascend 910
S
ShiningZhang 已提交
259
                config.enable_npu(gpu_id)
T
TeslaZhao 已提交
260
        # set cpu low precision
Z
zhangjun 已提交
261 262
        if not use_gpu and not use_lite:
            if precision_type == paddle_infer.PrecisionType.Int8:
263 264 265 266 267
                logger.warning(
                    "PRECISION INT8 is not supported in CPU right now! Please use fp16 or bf16."
                )
                #config.enable_quantizer()
            if precision is not None and precision.lower() == "bf16":
Z
zhangjun 已提交
268
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
269 270 271
                if mkldnn_bf16_op_list is not None:
                    config.set_bfloat16_op(mkldnn_bf16_op_list)

272
        self.predictor = paddle_infer.create_predictor(config)
D
dongdaxiang 已提交
273

W
wangjiawei04 已提交
274
    def predict(self, feed=None, fetch=None, batch=False, log_id=0):
275
        """
276
        Run model inference by Paddle Inference API.
277 278

        Args:
279 280 281
            feed: feed var list, None is not allowed.
            fetch: fetch var list, None allowed. when it is None, all fetch 
                   vars are returned. Otherwise, return fetch specified result.
282 283 284 285 286 287 288
            batch: batch data or not, False default.If batch is False, a new
                   dimension is added to header of the shape[np.newaxis].
            log_id: for logging

        Returns:
            fetch_map: dict 
        """
289 290
        if feed is None:
            raise ValueError("You should specify feed vars for prediction.\
291
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
292 293 294 295 296 297 298

        feed_batch = []
        if isinstance(feed, dict):
            feed_batch.append(feed)
        elif isinstance(feed, list):
            feed_batch = feed
        else:
299 300
            raise ValueError("Feed only accepts dict and list of dict.\
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
301

302 303 304 305 306 307 308
        fetch_list = []
        if fetch is not None:
            if isinstance(fetch, str):
                fetch_list = [fetch]
            elif isinstance(fetch, list):
                fetch_list = fetch

309
        # Filter invalid fetch names
310
        fetch_names = []
D
dongdaxiang 已提交
311 312 313 314
        for key in fetch_list:
            if key in self.fetch_names_:
                fetch_names.append(key)

315
        # Assemble the input data of paddle predictor, and filter invalid inputs. 
316 317
        input_names = self.predictor.get_input_names()
        for name in input_names:
M
MRXLT 已提交
318 319 320
            if isinstance(feed[name], list):
                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
                    name])
321 322
            if self.feed_types_[name] == 0:
                feed[name] = feed[name].astype("int64")
W
wangjiawei04 已提交
323
            elif self.feed_types_[name] == 1:
324
                feed[name] = feed[name].astype("float32")
W
wangjiawei04 已提交
325 326
            elif self.feed_types_[name] == 2:
                feed[name] = feed[name].astype("int32")
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
            elif self.feed_types_[name] == 3:
                feed[name] = feed[name].astype("float64")
            elif self.feed_types_[name] == 4:
                feed[name] = feed[name].astype("int16")
            elif self.feed_types_[name] == 5:
                feed[name] = feed[name].astype("float16")
            elif self.feed_types_[name] == 6:
                feed[name] = feed[name].astype("uint16")
            elif self.feed_types_[name] == 7:
                feed[name] = feed[name].astype("uint8")
            elif self.feed_types_[name] == 8:
                feed[name] = feed[name].astype("int8")
            elif self.feed_types_[name] == 9:
                feed[name] = feed[name].astype("bool")
            elif self.feed_types_[name] == 10:
                feed[name] = feed[name].astype("complex64")
            elif self.feed_types_[name] == 11:
                feed[name] = feed[name].astype("complex128")
W
wangjiawei04 已提交
345 346
            else:
                raise ValueError("local predictor receives wrong data type")
347

348
            input_tensor_handle = self.predictor.get_input_handle(name)
W
wangjiawei04 已提交
349
            if "{}.lod".format(name) in feed:
350
                input_tensor_handle.set_lod([feed["{}.lod".format(name)]])
W
wangjiawei04 已提交
351
            if batch == False:
352
                input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :])
W
wangjiawei04 已提交
353
            else:
354
                input_tensor_handle.copy_from_cpu(feed[name])
355 356

        # set output tensor handlers
357
        output_tensor_handles = []
358
        output_name_to_index_dict = {}
359
        output_names = self.predictor.get_output_names()
360
        for i, output_name in enumerate(output_names):
361 362
            output_tensor_handle = self.predictor.get_output_handle(output_name)
            output_tensor_handles.append(output_tensor_handle)
363
            output_name_to_index_dict[output_name] = i
364 365 366 367 368

        # Run inference 
        self.predictor.run()

        # Assemble output data of predict results
369
        outputs = []
370 371
        for output_tensor_handle in output_tensor_handles:
            output = output_tensor_handle.copy_to_cpu()
372
            outputs.append(output)
373 374 375 376
        outputs_len = len(outputs)

        # Copy fetch vars. If fetch is None, it will copy all results from output_tensor_handles. 
        # Otherwise, it will copy the fields specified from output_tensor_handles.
D
dongdaxiang 已提交
377
        fetch_map = {}
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
        if fetch is None:
            for i, name in enumerate(output_names):
                fetch_map[name] = outputs[i]
                if len(output_tensor_handles[i].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        i].lod()[0]).astype('int32')
        else:
            # Because the save_inference_model interface will increase the scale op 
            # in the network, the name of fetch_var is different from that in prototxt. 
            # Therefore, it is compatible with v0.6.x and the previous model save format,
            # and here is compatible with the results that do not match.
            fetch_match_num = 0
            for i, name in enumerate(fetch):
                output_index = output_name_to_index_dict.get(name)
                if output_index is None:
                    continue

                fetch_map[name] = outputs[output_index]
                fetch_match_num += 1
                if len(output_tensor_handles[output_index].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        output_index].lod()[0]).astype('int32')

            # Compatible with v0.6.x and lower versions model saving formats.
            if fetch_match_num == 0:
                logger.debug("fetch match num is 0. Retrain the model please!")
                for i, name in enumerate(fetch):
                    if i >= outputs_len:
                        break
                    fetch_map[name] = outputs[i]
                    if len(output_tensor_handles[i].lod()) > 0:
                        fetch_map[name + ".lod"] = np.array(
                            output_tensor_handles[i].lod()[0]).astype('int32')

D
dongdaxiang 已提交
412
        return fetch_map