local_predict.py 19.7 KB
Newer Older
D
dongdaxiang 已提交
1 2
# -*- coding: utf-8 -*-
"""
D
dongdaxiang 已提交
3
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
D
dongdaxiang 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import os
import google.protobuf.text_format
import numpy as np
import argparse
from .proto import general_model_config_pb2 as m_config
23
import paddle.inference as paddle_infer
D
dongdaxiang 已提交
24
import logging
25
import glob
26 27
from paddle_serving_server.pipeline.error_catch import ErrorCatch, CustomException, CustomExceptionCode, ParamChecker, ParamVerify
check_dynamic_shape_info=ParamVerify.check_dynamic_shape_info
D
dongdaxiang 已提交
28 29

logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
30
logger = logging.getLogger("LocalPredictor")
D
dongdaxiang 已提交
31
logger.setLevel(logging.INFO)
32
from paddle_serving_server.util import kill_stop_process_by_pid
D
dongdaxiang 已提交
33

Z
zhangjun 已提交
34 35 36 37
precision_map = {
    'int8': paddle_infer.PrecisionType.Int8,
    'fp32': paddle_infer.PrecisionType.Float32,
    'fp16': paddle_infer.PrecisionType.Half,
T
TeslaZhao 已提交
38
    'bf16': 'bf16',
Z
zhangjun 已提交
39 40
}

D
dongdaxiang 已提交
41

W
wangjiawei04 已提交
42
class LocalPredictor(object):
43 44 45 46 47 48
    """
    Prediction in the current process of the local environment, in process
    call, Compared with RPC/HTTP, LocalPredictor has better performance, 
    because of no network and packaging load.
    """

D
dongdaxiang 已提交
49 50 51 52 53 54 55 56 57 58
    def __init__(self):
        self.feed_names_ = []
        self.fetch_names_ = []
        self.feed_types_ = {}
        self.fetch_types_ = {}
        self.feed_shapes_ = {}
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}
        self.fetch_names_to_type_ = {}

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
    def search_suffix_files(self, model_path, target_suffix):
        """
        Find all files with the suffix xxx in the specified directory.

        Args:
            model_path: model directory, not None.
            target_suffix: filenames with target suffix, not None. e.g: *.pdmodel

        Returns:
            file_list, None, [] or [path, ] . 
        """
        if model_path is None or target_suffix is None:
            return None

        file_list = glob.glob(os.path.join(model_path, target_suffix))
        return file_list

76 77 78 79 80 81 82 83 84
    def load_model_config(self,
                          model_path,
                          use_gpu=False,
                          gpu_id=0,
                          use_profile=False,
                          thread_num=1,
                          mem_optim=True,
                          ir_optim=False,
                          use_trt=False,
Z
zhangjun 已提交
85 86
                          use_lite=False,
                          use_xpu=False,
Z
zhangjun 已提交
87
                          precision="fp32",
T
TeslaZhao 已提交
88 89 90 91
                          use_mkldnn=False,
                          mkldnn_cache_capacity=0,
                          mkldnn_op_list=None,
                          mkldnn_bf16_op_list=None,
92
                          use_feed_fetch_ops=False,
F
felixhjh 已提交
93 94
                          use_ascend_cl=False,
                          min_subgraph_size=3,
95
                          dynamic_shape_info={},
96 97 98
                          use_calib=False,
                          collect_shape_range_info="",
                          tuned_dynamic_shape_info=""):
99
        """
100
        Load model configs and create the paddle predictor by Paddle Inference API.
101 102 103 104 105 106
   
        Args:
            model_path: model config path.
            use_gpu: calculating with gpu, False default.
            gpu_id: gpu id, 0 default.
            use_profile: use predictor profiles, False default.
T
TeslaZhao 已提交
107
            thread_num: thread nums of cpu math library, default 1. 
108 109 110
            mem_optim: memory optimization, True default.
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
Z
zhangjun 已提交
111
            use_lite: use Paddle-Lite Engint, False default
F
felixhjh 已提交
112 113 114
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
            use_lite: use Paddle-Lite Engint, False default
Z
zhangjun 已提交
115
            use_xpu: run predict on Baidu Kunlun, False default
Z
zhangjun 已提交
116
            precision: precision mode, "fp32" default
T
TeslaZhao 已提交
117 118 119 120
            use_mkldnn: use MKLDNN, False default.
            mkldnn_cache_capacity: cache capacity for input shapes, 0 default.
            mkldnn_op_list: op list accelerated using MKLDNN, None default.
            mkldnn_bf16_op_list: op list accelerated using MKLDNN bf16, None default.
121
            use_feed_fetch_ops: use feed/fetch ops, False default.
122
            use_ascend_cl: run predict on Huawei Ascend, False default
F
felixhjh 已提交
123 124
            min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default
            dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default 
125
            use_calib: use TensorRT calibration, False default
126
        """
H
HexToString 已提交
127
        gpu_id = int(gpu_id)
D
dongdaxiang 已提交
128 129 130 131 132
        client_config = "{}/serving_server_conf.prototxt".format(model_path)
        model_conf = m_config.GeneralModelConfig()
        f = open(client_config, 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
133 134 135 136 137 138 139 140 141

        # Init paddle_infer config
        # Paddle's model files and parameter files have multiple naming rules:
        #   1) __model__, __params__
        #   2) *.pdmodel, *.pdiparams
        #   3) __model__, conv2d_1.w_0, conv2d_2.w_0, fc_1.w_0, conv2d_1.b_0, ... 
        pdmodel_file_list = self.search_suffix_files(model_path, "*.pdmodel")
        pdiparams_file_list = self.search_suffix_files(model_path,
                                                       "*.pdiparams")
W
wangjiawei04 已提交
142
        if os.path.exists(os.path.join(model_path, "__params__")):
143
            # case 1) initializing
144 145 146
            config = paddle_infer.Config(
                os.path.join(model_path, "__model__"),
                os.path.join(model_path, "__params__"))
147 148 149 150 151 152 153 154
        elif pdmodel_file_list and len(
                pdmodel_file_list) > 0 and pdiparams_file_list and len(
                    pdiparams_file_list) > 0:
            # case 2) initializing
            logger.info("pdmodel_file_list:{}, pdiparams_file_list:{}".format(
                pdmodel_file_list, pdiparams_file_list))
            config = paddle_infer.Config(pdmodel_file_list[0],
                                         pdiparams_file_list[0])
W
wangjiawei04 已提交
155
        else:
156
            # case 3) initializing.
157 158 159
            config = paddle_infer.Config(model_path)

        logger.info(
T
TeslaZhao 已提交
160 161 162 163
            "LocalPredictor load_model_config params: model_path:{}, use_gpu:{}, "
            "gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{}, "
            "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, "
            "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
164
            "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, "
165 166
            "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{},"
            "collect_shape_range_info:{},tuned_dynamic_shape_info:{}".
167 168 169 170
            format(model_path, use_gpu, gpu_id, use_profile, thread_num,
                   mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision,
                   use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
                   mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl,
171 172
                   min_subgraph_size, dynamic_shape_info,
                   collect_shape_range_info,tuned_dynamic_shape_info))
D
dongdaxiang 已提交
173 174 175 176 177 178 179 180 181 182 183 184 185

        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}

        for i, var in enumerate(model_conf.feed_var):
            self.feed_names_to_idx_[var.alias_name] = i
            self.feed_types_[var.alias_name] = var.feed_type
            self.feed_shapes_[var.alias_name] = var.shape

        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_names_to_idx_[var.alias_name] = i
186 187
            self.fetch_types_[var.alias_name] = var.fetch_type
            self.fetch_names_to_type_[var.alias_name] = var.shape
D
dongdaxiang 已提交
188

T
TeslaZhao 已提交
189
        # set precision of inference.
Z
zhangjun 已提交
190
        precision_type = paddle_infer.PrecisionType.Float32
191
        if precision is not None and precision.lower() in precision_map:
Z
zhangjun 已提交
192
            precision_type = precision_map[precision.lower()]
193 194 195
        else:
            logger.warning("precision error!!! Please check precision:{}".
                           format(precision))
T
TeslaZhao 已提交
196
        # set profile
197
        if use_profile:
D
dongdaxiang 已提交
198
            config.enable_profile()
T
TeslaZhao 已提交
199
        # set memory optimization
200 201
        if mem_optim:
            config.enable_memory_optim()
T
TeslaZhao 已提交
202
        # set ir optimization, threads of cpu math library
203
        config.switch_ir_optim(ir_optim)
T
TeslaZhao 已提交
204
        # use feed & fetch ops
205
        config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
T
TeslaZhao 已提交
206
        # pass optim
W
wangjiawei04 已提交
207
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
208

T
TeslaZhao 已提交
209 210 211 212
        # set cpu & mkldnn
        config.set_cpu_math_library_num_threads(thread_num)
        if use_mkldnn:
            config.enable_mkldnn()
T
TeslaZhao 已提交
213 214
            if precision_type == "bf16":
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
215 216 217 218 219
            if mkldnn_cache_capacity > 0:
                config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
            if mkldnn_op_list is not None:
                config.set_mkldnn_op(mkldnn_op_list)
        # set gpu
220 221
        if collect_shape_range_info != "":
            config.collect_shape_range_info(collect_shape_range_info)
222 223 224 225 226 227
        if not use_gpu:
            config.disable_gpu()
        else:
            config.enable_use_gpu(100, gpu_id)
            if use_trt:
                config.enable_tensorrt_engine(
Z
zhangjun 已提交
228
                    precision_mode=precision_type,
229 230
                    workspace_size=1 << 20,
                    max_batch_size=32,
F
felixhjh 已提交
231
                    min_subgraph_size=min_subgraph_size,
232
                    use_static=False,
233
                    use_calib_mode=use_calib)
F
felixhjh 已提交
234

235 236 237
                if tuned_dynamic_shape_info != "":
                    config.enable_tuned_tensorrt_dynamic_shape(tuned_dynamic_shape_info, True)

238 239 240 241 242 243 244 245 246
                @ErrorCatch
                @ParamChecker
                def dynamic_shape_info_helper(dynamic_shape_info:lambda dynamic_shape_info: check_dynamic_shape_info(dynamic_shape_info)):
                    pass
                _, resp = dynamic_shape_info_helper(dynamic_shape_info)
                if resp.err_no != CustomExceptionCode.OK.value:
                    print("dynamic_shape_info configure error, it should contain [min_input_shape', 'max_input_shape', 'opt_input_shape' {}".format(resp.err_msg))
                    kill_stop_process_by_pid("kill", os.getpgid(os.getpid()))

247
                if len(dynamic_shape_info) and tuned_dynamic_shape_info == "":
248 249 250 251
                    config.set_trt_dynamic_shape_info(
                        dynamic_shape_info['min_input_shape'],
                        dynamic_shape_info['max_input_shape'],
                        dynamic_shape_info['opt_input_shape'])
T
TeslaZhao 已提交
252
        # set lite
Z
zhangjun 已提交
253 254
        if use_lite:
            config.enable_lite_engine(
Z
zhangjun 已提交
255
                precision_mode=precision_type,
256 257 258
                zero_copy=True,
                passes_filter=[],
                ops_filter=[])
259
            config.switch_ir_optim(True)
T
TeslaZhao 已提交
260
        # set xpu
Z
zhangjun 已提交
261
        if use_xpu:
Z
zhangjun 已提交
262
            # 2MB l3 cache
T
TeslaZhao 已提交
263
            config.enable_xpu(8 * 1024 * 1024)
S
ShiningZhang 已提交
264
            config.set_xpu_device_id(gpu_id)
265 266 267
        # set ascend cl
        if use_ascend_cl:
            if use_lite:
S
ShiningZhang 已提交
268
                # for ascend 310
269 270 271 272 273 274 275 276 277
                nnadapter_device_names = "huawei_ascend_npu"
                nnadapter_context_properties = \
                    "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
                nnadapter_model_cache_dir = ""
                config.nnadapter() \
                .enable() \
                .set_device_names([nnadapter_device_names]) \
                .set_context_properties(nnadapter_context_properties) \
                .set_model_cache_dir(nnadapter_model_cache_dir)
S
ShiningZhang 已提交
278
            else:
S
ShiningZhang 已提交
279
                # for ascend 910
S
ShiningZhang 已提交
280
                config.enable_npu(gpu_id)
T
TeslaZhao 已提交
281
        # set cpu low precision
Z
zhangjun 已提交
282 283
        if not use_gpu and not use_lite:
            if precision_type == paddle_infer.PrecisionType.Int8:
284 285 286 287 288
                logger.warning(
                    "PRECISION INT8 is not supported in CPU right now! Please use fp16 or bf16."
                )
                #config.enable_quantizer()
            if precision is not None and precision.lower() == "bf16":
Z
zhangjun 已提交
289
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
290 291 292
                if mkldnn_bf16_op_list is not None:
                    config.set_bfloat16_op(mkldnn_bf16_op_list)

293 294 295 296 297 298 299 300 301 302 303 304
        @ErrorCatch
        def create_predictor_check(config):
            predictor = paddle_infer.create_predictor(config)
            return predictor
        predictor, resp = create_predictor_check(config)
        if resp.err_no != CustomExceptionCode.OK.value:
            logger.critical(
                "failed to create predictor: {}".format(resp.err_msg),
                exc_info=False)
            print("failed to create predictor: {}".format(resp.err_msg))
            kill_stop_process_by_pid("kill", os.getpgid(os.getpid()))
        self.predictor = predictor
D
dongdaxiang 已提交
305

W
wangjiawei04 已提交
306
    def predict(self, feed=None, fetch=None, batch=False, log_id=0):
307
        """
308
        Run model inference by Paddle Inference API.
309 310

        Args:
311 312 313
            feed: feed var list, None is not allowed.
            fetch: fetch var list, None allowed. when it is None, all fetch 
                   vars are returned. Otherwise, return fetch specified result.
314 315 316 317 318 319 320
            batch: batch data or not, False default.If batch is False, a new
                   dimension is added to header of the shape[np.newaxis].
            log_id: for logging

        Returns:
            fetch_map: dict 
        """
321 322
        if feed is None:
            raise ValueError("You should specify feed vars for prediction.\
323
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
324 325 326 327 328 329 330

        feed_batch = []
        if isinstance(feed, dict):
            feed_batch.append(feed)
        elif isinstance(feed, list):
            feed_batch = feed
        else:
331 332
            raise ValueError("Feed only accepts dict and list of dict.\
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
333

334 335 336 337 338 339 340
        fetch_list = []
        if fetch is not None:
            if isinstance(fetch, str):
                fetch_list = [fetch]
            elif isinstance(fetch, list):
                fetch_list = fetch

341
        # Filter invalid fetch names
342
        fetch_names = []
D
dongdaxiang 已提交
343 344 345 346
        for key in fetch_list:
            if key in self.fetch_names_:
                fetch_names.append(key)

347
        # Assemble the input data of paddle predictor, and filter invalid inputs. 
348 349
        input_names = self.predictor.get_input_names()
        for name in input_names:
350 351
            if isinstance(feed[name], list) and not isinstance(feed[name][0],
                                                               str):
M
MRXLT 已提交
352 353
                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
                    name])
354 355
            if self.feed_types_[name] == 0:
                feed[name] = feed[name].astype("int64")
W
wangjiawei04 已提交
356
            elif self.feed_types_[name] == 1:
357
                feed[name] = feed[name].astype("float32")
W
wangjiawei04 已提交
358 359
            elif self.feed_types_[name] == 2:
                feed[name] = feed[name].astype("int32")
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
            elif self.feed_types_[name] == 3:
                feed[name] = feed[name].astype("float64")
            elif self.feed_types_[name] == 4:
                feed[name] = feed[name].astype("int16")
            elif self.feed_types_[name] == 5:
                feed[name] = feed[name].astype("float16")
            elif self.feed_types_[name] == 6:
                feed[name] = feed[name].astype("uint16")
            elif self.feed_types_[name] == 7:
                feed[name] = feed[name].astype("uint8")
            elif self.feed_types_[name] == 8:
                feed[name] = feed[name].astype("int8")
            elif self.feed_types_[name] == 9:
                feed[name] = feed[name].astype("bool")
            elif self.feed_types_[name] == 10:
                feed[name] = feed[name].astype("complex64")
            elif self.feed_types_[name] == 11:
                feed[name] = feed[name].astype("complex128")
378 379 380
            elif isinstance(feed[name], list) and isinstance(feed[name][0],
                                                             str):
                pass
W
wangjiawei04 已提交
381 382
            else:
                raise ValueError("local predictor receives wrong data type")
383

384
            input_tensor_handle = self.predictor.get_input_handle(name)
W
wangjiawei04 已提交
385
            if "{}.lod".format(name) in feed:
386
                input_tensor_handle.set_lod([feed["{}.lod".format(name)]])
W
wangjiawei04 已提交
387
            if batch == False:
388
                input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :])
W
wangjiawei04 已提交
389
            else:
390
                input_tensor_handle.copy_from_cpu(feed[name])
391 392

        # set output tensor handlers
393
        output_tensor_handles = []
394
        output_name_to_index_dict = {}
395
        output_names = self.predictor.get_output_names()
396
        for i, output_name in enumerate(output_names):
397 398
            output_tensor_handle = self.predictor.get_output_handle(output_name)
            output_tensor_handles.append(output_tensor_handle)
399
            output_name_to_index_dict[output_name] = i
400 401 402 403 404

        # Run inference 
        self.predictor.run()

        # Assemble output data of predict results
405
        outputs = []
406 407
        for output_tensor_handle in output_tensor_handles:
            output = output_tensor_handle.copy_to_cpu()
408
            outputs.append(output)
409 410 411 412
        outputs_len = len(outputs)

        # Copy fetch vars. If fetch is None, it will copy all results from output_tensor_handles. 
        # Otherwise, it will copy the fields specified from output_tensor_handles.
D
dongdaxiang 已提交
413
        fetch_map = {}
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
        if fetch is None:
            for i, name in enumerate(output_names):
                fetch_map[name] = outputs[i]
                if len(output_tensor_handles[i].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        i].lod()[0]).astype('int32')
        else:
            # Because the save_inference_model interface will increase the scale op 
            # in the network, the name of fetch_var is different from that in prototxt. 
            # Therefore, it is compatible with v0.6.x and the previous model save format,
            # and here is compatible with the results that do not match.
            fetch_match_num = 0
            for i, name in enumerate(fetch):
                output_index = output_name_to_index_dict.get(name)
                if output_index is None:
                    continue

                fetch_map[name] = outputs[output_index]
                fetch_match_num += 1
                if len(output_tensor_handles[output_index].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        output_index].lod()[0]).astype('int32')

            # Compatible with v0.6.x and lower versions model saving formats.
            if fetch_match_num == 0:
                logger.debug("fetch match num is 0. Retrain the model please!")
                for i, name in enumerate(fetch):
                    if i >= outputs_len:
                        break
                    fetch_map[name] = outputs[i]
                    if len(output_tensor_handles[i].lod()) > 0:
                        fetch_map[name + ".lod"] = np.array(
                            output_tensor_handles[i].lod()[0]).astype('int32')

D
dongdaxiang 已提交
448
        return fetch_map