local_predict.py 19.2 KB
Newer Older
D
dongdaxiang 已提交
1 2
# -*- coding: utf-8 -*-
"""
D
dongdaxiang 已提交
3
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
D
dongdaxiang 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import os
import google.protobuf.text_format
import numpy as np
import argparse
from .proto import general_model_config_pb2 as m_config
23
import paddle.inference as paddle_infer
D
dongdaxiang 已提交
24
import logging
25
import glob
26 27
from paddle_serving_server.pipeline.error_catch import ErrorCatch, CustomException, CustomExceptionCode, ParamChecker, ParamVerify
check_dynamic_shape_info=ParamVerify.check_dynamic_shape_info
D
dongdaxiang 已提交
28 29

logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
30
logger = logging.getLogger("LocalPredictor")
D
dongdaxiang 已提交
31
logger.setLevel(logging.INFO)
32
from paddle_serving_server.util import kill_stop_process_by_pid
D
dongdaxiang 已提交
33

Z
zhangjun 已提交
34 35 36 37
precision_map = {
    'int8': paddle_infer.PrecisionType.Int8,
    'fp32': paddle_infer.PrecisionType.Float32,
    'fp16': paddle_infer.PrecisionType.Half,
T
TeslaZhao 已提交
38
    'bf16': 'bf16',
Z
zhangjun 已提交
39 40
}

D
dongdaxiang 已提交
41

W
wangjiawei04 已提交
42
class LocalPredictor(object):
43 44 45 46 47 48
    """
    Prediction in the current process of the local environment, in process
    call, Compared with RPC/HTTP, LocalPredictor has better performance, 
    because of no network and packaging load.
    """

D
dongdaxiang 已提交
49 50 51 52 53 54 55 56 57 58
    def __init__(self):
        self.feed_names_ = []
        self.fetch_names_ = []
        self.feed_types_ = {}
        self.fetch_types_ = {}
        self.feed_shapes_ = {}
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}
        self.fetch_names_to_type_ = {}

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
    def search_suffix_files(self, model_path, target_suffix):
        """
        Find all files with the suffix xxx in the specified directory.

        Args:
            model_path: model directory, not None.
            target_suffix: filenames with target suffix, not None. e.g: *.pdmodel

        Returns:
            file_list, None, [] or [path, ] . 
        """
        if model_path is None or target_suffix is None:
            return None

        file_list = glob.glob(os.path.join(model_path, target_suffix))
        return file_list

76 77 78 79 80 81 82 83 84
    def load_model_config(self,
                          model_path,
                          use_gpu=False,
                          gpu_id=0,
                          use_profile=False,
                          thread_num=1,
                          mem_optim=True,
                          ir_optim=False,
                          use_trt=False,
Z
zhangjun 已提交
85 86
                          use_lite=False,
                          use_xpu=False,
Z
zhangjun 已提交
87
                          precision="fp32",
T
TeslaZhao 已提交
88 89 90 91
                          use_mkldnn=False,
                          mkldnn_cache_capacity=0,
                          mkldnn_op_list=None,
                          mkldnn_bf16_op_list=None,
92
                          use_feed_fetch_ops=False,
F
felixhjh 已提交
93 94
                          use_ascend_cl=False,
                          min_subgraph_size=3,
95 96
                          dynamic_shape_info={},
                          use_calib=False):
97
        """
98
        Load model configs and create the paddle predictor by Paddle Inference API.
99 100 101 102 103 104
   
        Args:
            model_path: model config path.
            use_gpu: calculating with gpu, False default.
            gpu_id: gpu id, 0 default.
            use_profile: use predictor profiles, False default.
T
TeslaZhao 已提交
105
            thread_num: thread nums of cpu math library, default 1. 
106 107 108
            mem_optim: memory optimization, True default.
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
Z
zhangjun 已提交
109
            use_lite: use Paddle-Lite Engint, False default
F
felixhjh 已提交
110 111 112
            ir_optim: open calculation chart optimization, False default.
            use_trt: use nvidia TensorRT optimization, False default
            use_lite: use Paddle-Lite Engint, False default
Z
zhangjun 已提交
113
            use_xpu: run predict on Baidu Kunlun, False default
Z
zhangjun 已提交
114
            precision: precision mode, "fp32" default
T
TeslaZhao 已提交
115 116 117 118
            use_mkldnn: use MKLDNN, False default.
            mkldnn_cache_capacity: cache capacity for input shapes, 0 default.
            mkldnn_op_list: op list accelerated using MKLDNN, None default.
            mkldnn_bf16_op_list: op list accelerated using MKLDNN bf16, None default.
119
            use_feed_fetch_ops: use feed/fetch ops, False default.
120
            use_ascend_cl: run predict on Huawei Ascend, False default
F
felixhjh 已提交
121 122
            min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default
            dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default 
123
            use_calib: use TensorRT calibration, False default
124
        """
H
HexToString 已提交
125
        gpu_id = int(gpu_id)
D
dongdaxiang 已提交
126 127 128 129 130
        client_config = "{}/serving_server_conf.prototxt".format(model_path)
        model_conf = m_config.GeneralModelConfig()
        f = open(client_config, 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)
131 132 133 134 135 136 137 138 139

        # Init paddle_infer config
        # Paddle's model files and parameter files have multiple naming rules:
        #   1) __model__, __params__
        #   2) *.pdmodel, *.pdiparams
        #   3) __model__, conv2d_1.w_0, conv2d_2.w_0, fc_1.w_0, conv2d_1.b_0, ... 
        pdmodel_file_list = self.search_suffix_files(model_path, "*.pdmodel")
        pdiparams_file_list = self.search_suffix_files(model_path,
                                                       "*.pdiparams")
W
wangjiawei04 已提交
140
        if os.path.exists(os.path.join(model_path, "__params__")):
141
            # case 1) initializing
142 143 144
            config = paddle_infer.Config(
                os.path.join(model_path, "__model__"),
                os.path.join(model_path, "__params__"))
145 146 147 148 149 150 151 152
        elif pdmodel_file_list and len(
                pdmodel_file_list) > 0 and pdiparams_file_list and len(
                    pdiparams_file_list) > 0:
            # case 2) initializing
            logger.info("pdmodel_file_list:{}, pdiparams_file_list:{}".format(
                pdmodel_file_list, pdiparams_file_list))
            config = paddle_infer.Config(pdmodel_file_list[0],
                                         pdiparams_file_list[0])
W
wangjiawei04 已提交
153
        else:
154
            # case 3) initializing.
155 156 157
            config = paddle_infer.Config(model_path)

        logger.info(
T
TeslaZhao 已提交
158 159 160 161
            "LocalPredictor load_model_config params: model_path:{}, use_gpu:{}, "
            "gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{}, "
            "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, "
            "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
162
            "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, "
163 164 165 166 167 168
            "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}".
            format(model_path, use_gpu, gpu_id, use_profile, thread_num,
                   mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision,
                   use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
                   mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl,
                   min_subgraph_size, dynamic_shape_info))
D
dongdaxiang 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181

        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
        self.feed_names_to_idx_ = {}
        self.fetch_names_to_idx_ = {}

        for i, var in enumerate(model_conf.feed_var):
            self.feed_names_to_idx_[var.alias_name] = i
            self.feed_types_[var.alias_name] = var.feed_type
            self.feed_shapes_[var.alias_name] = var.shape

        for i, var in enumerate(model_conf.fetch_var):
            self.fetch_names_to_idx_[var.alias_name] = i
182 183
            self.fetch_types_[var.alias_name] = var.fetch_type
            self.fetch_names_to_type_[var.alias_name] = var.shape
D
dongdaxiang 已提交
184

T
TeslaZhao 已提交
185
        # set precision of inference.
Z
zhangjun 已提交
186
        precision_type = paddle_infer.PrecisionType.Float32
187
        if precision is not None and precision.lower() in precision_map:
Z
zhangjun 已提交
188
            precision_type = precision_map[precision.lower()]
189 190 191
        else:
            logger.warning("precision error!!! Please check precision:{}".
                           format(precision))
T
TeslaZhao 已提交
192
        # set profile
193
        if use_profile:
D
dongdaxiang 已提交
194
            config.enable_profile()
T
TeslaZhao 已提交
195
        # set memory optimization
196 197
        if mem_optim:
            config.enable_memory_optim()
T
TeslaZhao 已提交
198
        # set ir optimization, threads of cpu math library
199
        config.switch_ir_optim(ir_optim)
T
TeslaZhao 已提交
200
        # use feed & fetch ops
201
        config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
T
TeslaZhao 已提交
202
        # pass optim
W
wangjiawei04 已提交
203
        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
204

T
TeslaZhao 已提交
205 206 207 208
        # set cpu & mkldnn
        config.set_cpu_math_library_num_threads(thread_num)
        if use_mkldnn:
            config.enable_mkldnn()
T
TeslaZhao 已提交
209 210
            if precision_type == "bf16":
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
211 212 213 214 215
            if mkldnn_cache_capacity > 0:
                config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
            if mkldnn_op_list is not None:
                config.set_mkldnn_op(mkldnn_op_list)
        # set gpu
216 217 218 219 220 221
        if not use_gpu:
            config.disable_gpu()
        else:
            config.enable_use_gpu(100, gpu_id)
            if use_trt:
                config.enable_tensorrt_engine(
Z
zhangjun 已提交
222
                    precision_mode=precision_type,
223 224
                    workspace_size=1 << 20,
                    max_batch_size=32,
F
felixhjh 已提交
225
                    min_subgraph_size=min_subgraph_size,
226
                    use_static=False,
227
                    use_calib_mode=use_calib)
F
felixhjh 已提交
228

229 230 231 232 233 234 235 236 237
                @ErrorCatch
                @ParamChecker
                def dynamic_shape_info_helper(dynamic_shape_info:lambda dynamic_shape_info: check_dynamic_shape_info(dynamic_shape_info)):
                    pass
                _, resp = dynamic_shape_info_helper(dynamic_shape_info)
                if resp.err_no != CustomExceptionCode.OK.value:
                    print("dynamic_shape_info configure error, it should contain [min_input_shape', 'max_input_shape', 'opt_input_shape' {}".format(resp.err_msg))
                    kill_stop_process_by_pid("kill", os.getpgid(os.getpid()))

F
felixhjh 已提交
238
                if len(dynamic_shape_info):
239 240 241 242
                    config.set_trt_dynamic_shape_info(
                        dynamic_shape_info['min_input_shape'],
                        dynamic_shape_info['max_input_shape'],
                        dynamic_shape_info['opt_input_shape'])
T
TeslaZhao 已提交
243
        # set lite
Z
zhangjun 已提交
244 245
        if use_lite:
            config.enable_lite_engine(
Z
zhangjun 已提交
246
                precision_mode=precision_type,
247 248 249
                zero_copy=True,
                passes_filter=[],
                ops_filter=[])
250
            config.switch_ir_optim(True)
T
TeslaZhao 已提交
251
        # set xpu
Z
zhangjun 已提交
252
        if use_xpu:
Z
zhangjun 已提交
253
            # 2MB l3 cache
T
TeslaZhao 已提交
254
            config.enable_xpu(8 * 1024 * 1024)
S
ShiningZhang 已提交
255
            config.set_xpu_device_id(gpu_id)
256 257 258
        # set ascend cl
        if use_ascend_cl:
            if use_lite:
S
ShiningZhang 已提交
259
                # for ascend 310
260 261 262 263 264 265 266 267 268
                nnadapter_device_names = "huawei_ascend_npu"
                nnadapter_context_properties = \
                    "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
                nnadapter_model_cache_dir = ""
                config.nnadapter() \
                .enable() \
                .set_device_names([nnadapter_device_names]) \
                .set_context_properties(nnadapter_context_properties) \
                .set_model_cache_dir(nnadapter_model_cache_dir)
S
ShiningZhang 已提交
269
            else:
S
ShiningZhang 已提交
270
                # for ascend 910
S
ShiningZhang 已提交
271
                config.enable_npu(gpu_id)
T
TeslaZhao 已提交
272
        # set cpu low precision
Z
zhangjun 已提交
273 274
        if not use_gpu and not use_lite:
            if precision_type == paddle_infer.PrecisionType.Int8:
275 276 277 278 279
                logger.warning(
                    "PRECISION INT8 is not supported in CPU right now! Please use fp16 or bf16."
                )
                #config.enable_quantizer()
            if precision is not None and precision.lower() == "bf16":
Z
zhangjun 已提交
280
                config.enable_mkldnn_bfloat16()
T
TeslaZhao 已提交
281 282 283
                if mkldnn_bf16_op_list is not None:
                    config.set_bfloat16_op(mkldnn_bf16_op_list)

284 285 286 287 288 289 290 291 292 293 294 295
        @ErrorCatch
        def create_predictor_check(config):
            predictor = paddle_infer.create_predictor(config)
            return predictor
        predictor, resp = create_predictor_check(config)
        if resp.err_no != CustomExceptionCode.OK.value:
            logger.critical(
                "failed to create predictor: {}".format(resp.err_msg),
                exc_info=False)
            print("failed to create predictor: {}".format(resp.err_msg))
            kill_stop_process_by_pid("kill", os.getpgid(os.getpid()))
        self.predictor = predictor
D
dongdaxiang 已提交
296

W
wangjiawei04 已提交
297
    def predict(self, feed=None, fetch=None, batch=False, log_id=0):
298
        """
299
        Run model inference by Paddle Inference API.
300 301

        Args:
302 303 304
            feed: feed var list, None is not allowed.
            fetch: fetch var list, None allowed. when it is None, all fetch 
                   vars are returned. Otherwise, return fetch specified result.
305 306 307 308 309 310 311
            batch: batch data or not, False default.If batch is False, a new
                   dimension is added to header of the shape[np.newaxis].
            log_id: for logging

        Returns:
            fetch_map: dict 
        """
312 313
        if feed is None:
            raise ValueError("You should specify feed vars for prediction.\
314
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
315 316 317 318 319 320 321

        feed_batch = []
        if isinstance(feed, dict):
            feed_batch.append(feed)
        elif isinstance(feed, list):
            feed_batch = feed
        else:
322 323
            raise ValueError("Feed only accepts dict and list of dict.\
                log_id:{}".format(log_id))
D
dongdaxiang 已提交
324

325 326 327 328 329 330 331
        fetch_list = []
        if fetch is not None:
            if isinstance(fetch, str):
                fetch_list = [fetch]
            elif isinstance(fetch, list):
                fetch_list = fetch

332
        # Filter invalid fetch names
333
        fetch_names = []
D
dongdaxiang 已提交
334 335 336 337
        for key in fetch_list:
            if key in self.fetch_names_:
                fetch_names.append(key)

338
        # Assemble the input data of paddle predictor, and filter invalid inputs. 
339 340
        input_names = self.predictor.get_input_names()
        for name in input_names:
341 342
            if isinstance(feed[name], list) and not isinstance(feed[name][0],
                                                               str):
M
MRXLT 已提交
343 344
                feed[name] = np.array(feed[name]).reshape(self.feed_shapes_[
                    name])
345 346
            if self.feed_types_[name] == 0:
                feed[name] = feed[name].astype("int64")
W
wangjiawei04 已提交
347
            elif self.feed_types_[name] == 1:
348
                feed[name] = feed[name].astype("float32")
W
wangjiawei04 已提交
349 350
            elif self.feed_types_[name] == 2:
                feed[name] = feed[name].astype("int32")
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
            elif self.feed_types_[name] == 3:
                feed[name] = feed[name].astype("float64")
            elif self.feed_types_[name] == 4:
                feed[name] = feed[name].astype("int16")
            elif self.feed_types_[name] == 5:
                feed[name] = feed[name].astype("float16")
            elif self.feed_types_[name] == 6:
                feed[name] = feed[name].astype("uint16")
            elif self.feed_types_[name] == 7:
                feed[name] = feed[name].astype("uint8")
            elif self.feed_types_[name] == 8:
                feed[name] = feed[name].astype("int8")
            elif self.feed_types_[name] == 9:
                feed[name] = feed[name].astype("bool")
            elif self.feed_types_[name] == 10:
                feed[name] = feed[name].astype("complex64")
            elif self.feed_types_[name] == 11:
                feed[name] = feed[name].astype("complex128")
369 370 371
            elif isinstance(feed[name], list) and isinstance(feed[name][0],
                                                             str):
                pass
W
wangjiawei04 已提交
372 373
            else:
                raise ValueError("local predictor receives wrong data type")
374

375
            input_tensor_handle = self.predictor.get_input_handle(name)
W
wangjiawei04 已提交
376
            if "{}.lod".format(name) in feed:
377
                input_tensor_handle.set_lod([feed["{}.lod".format(name)]])
W
wangjiawei04 已提交
378
            if batch == False:
379
                input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :])
W
wangjiawei04 已提交
380
            else:
381
                input_tensor_handle.copy_from_cpu(feed[name])
382 383

        # set output tensor handlers
384
        output_tensor_handles = []
385
        output_name_to_index_dict = {}
386
        output_names = self.predictor.get_output_names()
387
        for i, output_name in enumerate(output_names):
388 389
            output_tensor_handle = self.predictor.get_output_handle(output_name)
            output_tensor_handles.append(output_tensor_handle)
390
            output_name_to_index_dict[output_name] = i
391 392 393 394 395

        # Run inference 
        self.predictor.run()

        # Assemble output data of predict results
396
        outputs = []
397 398
        for output_tensor_handle in output_tensor_handles:
            output = output_tensor_handle.copy_to_cpu()
399
            outputs.append(output)
400 401 402 403
        outputs_len = len(outputs)

        # Copy fetch vars. If fetch is None, it will copy all results from output_tensor_handles. 
        # Otherwise, it will copy the fields specified from output_tensor_handles.
D
dongdaxiang 已提交
404
        fetch_map = {}
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
        if fetch is None:
            for i, name in enumerate(output_names):
                fetch_map[name] = outputs[i]
                if len(output_tensor_handles[i].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        i].lod()[0]).astype('int32')
        else:
            # Because the save_inference_model interface will increase the scale op 
            # in the network, the name of fetch_var is different from that in prototxt. 
            # Therefore, it is compatible with v0.6.x and the previous model save format,
            # and here is compatible with the results that do not match.
            fetch_match_num = 0
            for i, name in enumerate(fetch):
                output_index = output_name_to_index_dict.get(name)
                if output_index is None:
                    continue

                fetch_map[name] = outputs[output_index]
                fetch_match_num += 1
                if len(output_tensor_handles[output_index].lod()) > 0:
                    fetch_map[name + ".lod"] = np.array(output_tensor_handles[
                        output_index].lod()[0]).astype('int32')

            # Compatible with v0.6.x and lower versions model saving formats.
            if fetch_match_num == 0:
                logger.debug("fetch match num is 0. Retrain the model please!")
                for i, name in enumerate(fetch):
                    if i >= outputs_len:
                        break
                    fetch_map[name] = outputs[i]
                    if len(output_tensor_handles[i].lod()) > 0:
                        fetch_map[name + ".lod"] = np.array(
                            output_tensor_handles[i].lod()[0]).astype('int32')

D
dongdaxiang 已提交
439
        return fetch_map