diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index 50eed1585bb50cad0c3b816f18288ecea8c50f7d..105c5c9afedd157eea3b51ec54ffd2d30890c2e7 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -82,7 +82,6 @@ class LocalPredictor(object): use_lite=False, use_xpu=False, precision="fp32", - use_calib=False, use_mkldnn=False, mkldnn_cache_capacity=0, mkldnn_op_list=None, @@ -90,7 +89,8 @@ class LocalPredictor(object): use_feed_fetch_ops=False, use_ascend_cl=False, min_subgraph_size=3, - dynamic_shape_info={}): + dynamic_shape_info={}, + use_calib=False): """ Load model configs and create the paddle predictor by Paddle Inference API. @@ -109,7 +109,6 @@ class LocalPredictor(object): use_lite: use Paddle-Lite Engint, False default use_xpu: run predict on Baidu Kunlun, False default precision: precision mode, "fp32" default - use_calib: use TensorRT calibration, False default use_mkldnn: use MKLDNN, False default. mkldnn_cache_capacity: cache capacity for input shapes, 0 default. mkldnn_op_list: op list accelerated using MKLDNN, None default. @@ -118,6 +117,7 @@ class LocalPredictor(object): use_ascend_cl: run predict on Huawei Ascend, False default min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default + use_calib: use TensorRT calibration, False default """ gpu_id = int(gpu_id) client_config = "{}/serving_server_conf.prototxt".format(model_path) @@ -221,7 +221,7 @@ class LocalPredictor(object): max_batch_size=32, min_subgraph_size=min_subgraph_size, use_static=False, - use_calib_mode=False) + use_calib_mode=use_calib) if len(dynamic_shape_info): config.set_trt_dynamic_shape_info( diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index c2d54136f53907184b25980d073611ddcafd1e1d..1013278d8b8795f3bb3b7f76d6e6a2d388b7f1e9 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -52,7 +52,8 @@ class LocalServiceHandler(object): mkldnn_op_list=None, mkldnn_bf16_op_list=None, min_subgraph_size=3, - dynamic_shape_info={}): + dynamic_shape_info={}, + use_calib=False): """ Initialization of localservicehandler @@ -75,6 +76,7 @@ class LocalServiceHandler(object): mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit. mkldnn_op_list: OP list optimized by mkldnn, None default. mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default. + use_calib: set inference use_calib_mode param, False default. Returns: None @@ -96,6 +98,7 @@ class LocalServiceHandler(object): self._mkldnn_bf16_op_list = None self.min_subgraph_size = 3 self.dynamic_shape_info = {} + self._use_calib = False if device_type == -1: # device_type is not set, determined by `devices`, @@ -175,23 +178,24 @@ class LocalServiceHandler(object): self._mkldnn_cache_capacity = mkldnn_cache_capacity self._mkldnn_op_list = mkldnn_op_list self._mkldnn_bf16_op_list = mkldnn_bf16_op_list + self._use_calib = use_calib _LOGGER.info( "Models({}) will be launched by device {}. use_gpu:{}, " "use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, " "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, " - "client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, " - "mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " + "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, " + "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}," "is_set_dynamic_shape_info:{}".format( model_config, self._device_name, self._use_gpu, self._use_trt, self._use_lite, self._use_xpu, device_type, self._devices, self._mem_optim, self._ir_optim, self._use_profile, self._thread_num, self._client_type, self._fetch_names, - self._precision, self._use_mkldnn, self._mkldnn_cache_capacity, - self._mkldnn_op_list, self._mkldnn_bf16_op_list, - self._use_ascend_cl, self.min_subgraph_size, - bool(len(self.dynamic_shape_info)))) + self._precision, self._use_calib, self._use_mkldnn, + self._mkldnn_cache_capacity, self._mkldnn_op_list, + self._mkldnn_bf16_op_list, self._use_ascend_cl, + self.min_subgraph_size, bool(len(self.dynamic_shape_info)))) def get_fetch_list(self): return self._fetch_names @@ -250,7 +254,8 @@ class LocalServiceHandler(object): mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, use_ascend_cl=self._use_ascend_cl, min_subgraph_size=self.min_subgraph_size, - dynamic_shape_info=self.dynamic_shape_info) + dynamic_shape_info=self.dynamic_shape_info, + use_calib=self._use_calib) return self._local_predictor_client def get_client_config(self): diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py index e261dec95a10f003f2efebbd1f0ac23f59ca9288..cd3b54d90290cbca4a3c151ef7521b09f15e739f 100644 --- a/python/pipeline/operator.py +++ b/python/pipeline/operator.py @@ -193,6 +193,7 @@ class Op(object): self.mkldnn_op_list = None self.mkldnn_bf16_op_list = None self.min_subgraph_size = 3 + self.use_calib = False if self._server_endpoints is None: server_endpoints = conf.get("server_endpoints", []) @@ -216,6 +217,7 @@ class Op(object): self.ir_optim = local_service_conf.get("ir_optim") self._fetch_names = local_service_conf.get("fetch_list") self.precision = local_service_conf.get("precision") + self.use_calib = local_service_conf.get("use_calib") self.use_mkldnn = local_service_conf.get("use_mkldnn") self.mkldnn_cache_capacity = local_service_conf.get( "mkldnn_cache_capacity") @@ -248,7 +250,8 @@ class Op(object): mkldnn_op_list=self.mkldnn_bf16_op_list, mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, - dynamic_shape_info=self.dynamic_shape_info) + dynamic_shape_info=self.dynamic_shape_info, + use_calib=self.use_calib) service_handler.prepare_server() # get fetch_list serivce_ports = service_handler.get_port_list() self._server_endpoints = [ @@ -278,7 +281,8 @@ class Op(object): mkldnn_op_list=self.mkldnn_op_list, mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, - dynamic_shape_info=self.dynamic_shape_info) + dynamic_shape_info=self.dynamic_shape_info, + use_calib=self.use_calib) if self._client_config is None: self._client_config = service_handler.get_client_config( ) @@ -784,8 +788,8 @@ class Op(object): self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_bf16_op_list, self.is_jump_op(), self.get_output_channels_of_jump_ops(), - self.min_subgraph_size, - self.dynamic_shape_info)) + self.min_subgraph_size, self.dynamic_shape_info, + self.use_calib)) p.daemon = True p.start() process.append(p) @@ -819,12 +823,12 @@ class Op(object): self._get_output_channels(), True, trace_buffer, self.model_config, self.workdir, self.thread_num, self.device_type, self.devices, self.mem_optim, - self.ir_optim, self.precision, self.use_mkldnn, - self.mkldnn_cache_capacity, self.mkldnn_op_list, - self.mkldnn_bf16_op_list, self.is_jump_op(), + self.ir_optim, self.precision, self.use_mkldnn, + self.mkldnn_cache_capacity, self.mkldnn_op_list, + self.mkldnn_bf16_op_list, self.is_jump_op(), self.get_output_channels_of_jump_ops(), - self.min_subgraph_size, - self.dynamic_shape_info)) + self.min_subgraph_size, self.dynamic_shape_info, + self.use_calib)) # When a process exits, it attempts to terminate # all of its daemonic child processes. t.daemon = True @@ -1283,9 +1287,10 @@ class Op(object): def _run(self, concurrency_idx, input_channel, output_channels, is_thread_op, trace_buffer, model_config, workdir, thread_num, - device_type, devices, mem_optim, ir_optim, precision, use_mkldnn, - mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, - is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info): + device_type, devices, mem_optim, ir_optim, precision, + use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, + mkldnn_bf16_op_list, is_jump_op, output_channels_of_jump_ops, + min_subgraph_size, dynamic_shape_info, use_calib): """ _run() is the entry function of OP process / thread model.When client type is local_predictor in process mode, the CUDA environment needs to @@ -1314,6 +1319,7 @@ class Op(object): mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default. is_jump_op: OP has jump op list or not, False default. output_channels_of_jump_ops: all output channels of jump ops. + use_calib: use calib mode of paddle inference, False default. Returns: None @@ -1339,7 +1345,8 @@ class Op(object): mkldnn_op_list=mkldnn_op_list, mkldnn_bf16_op_list=mkldnn_bf16_op_list, min_subgraph_size=min_subgraph_size, - dynamic_shape_info=dynamic_shape_info) + dynamic_shape_info=dynamic_shape_info, + use_calib=use_calib) _LOGGER.info("Init cuda env in process {}".format( concurrency_idx)) diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py index 5cc7e4de55dc28b9d5d9bb04f89a1464bd6bd644..3ff765c4725a31c31a0de4effc01303507f1ebbc 100644 --- a/python/pipeline/pipeline_server.py +++ b/python/pipeline/pipeline_server.py @@ -421,6 +421,7 @@ class ServerYamlConfChecker(object): "use_calib": False, "use_mkldnn": False, "mkldnn_cache_capacity": 0, + "min_subgraph_size": 3, } conf_type = { "model_config": str, @@ -436,6 +437,7 @@ class ServerYamlConfChecker(object): "mkldnn_cache_capacity": int, "mkldnn_op_list": list, "mkldnn_bf16_op_list": list, + "min_subgraph_size": int, } conf_qualification = {"thread_num": (">=", 1), } ServerYamlConfChecker.check_conf(conf, default_conf, conf_type, diff --git a/tools/generate_runtime_docker.sh b/tools/generate_runtime_docker.sh index 626f7b21d31e5f957dab902521b24ba21d270dd8..ee292430048a3cfadd7be551b4f9a98098c9c4f6 100644 --- a/tools/generate_runtime_docker.sh +++ b/tools/generate_runtime_docker.sh @@ -9,9 +9,9 @@ function usage echo " "; echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11.2"; echo " --python : python version, 3.6/3.7/3.8 "; - #echo " --serving : serving version(0.6.0/0.6.2)"; - #echo " --paddle : paddle version(2.1.0/2.2.0)" - echo " --image_name : image name(default serving_runtime:env-python)"; + echo " --serving : serving version(0.7.0/0.6.2)"; + echo " --paddle : paddle version(2.2.0/2.1.2)" + echo " --image_name : image name(default serving_runtime:env-python)" echo " -h | --help : helper"; } @@ -25,9 +25,9 @@ function parse_args case "$1" in --env ) env="$2"; shift;; --python ) python="$2"; shift;; - #--serving ) serving="$2"; shift;; - #--paddle ) paddle="$2"; shift;; - --image_name ) image_name="$2"; shift;; + --serving ) serving="$2"; shift;; + --paddle ) paddle="$2"; shift;; + --image_name ) image_name="$2"; shift;; -h | --help ) usage; exit;; # quit and show usage * ) args+=("$1") # if no match, add it to the positional args esac @@ -41,7 +41,7 @@ function parse_args positional_2="${args[1]}" # validate required args - if [[ -z "${env}" || -z "${python}" ]]; then + if [[ -z "${paddle}" || -z "${env}" || -z "${python}" || -z "${serving}" ]]; then echo "Invalid arguments. paddle or env or python or serving is missing." usage exit; @@ -57,8 +57,6 @@ function parse_args function run { - python="2.2.0" - serving="0.7.0" parse_args "$@" echo "named arg: env: $env" @@ -71,6 +69,8 @@ function run elif [ $env == "cuda11.2" ]; then base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04" fi + #python="2.2.0" + #serving="0.7.0" echo "base image: $base_image" echo "named arg: python: $python" echo "named arg: serving: $serving" @@ -78,8 +78,7 @@ function run echo "named arg: image_name: $image_name" sed -e "s/<>/$base_image/g" -e "s/<>/$python/g" -e "s/<>/$env/g" -e "s/<>/$serving/g" -e "s/<>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp - #docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp . - docker build -t $image_name -f Dockerfile.tmp . + docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp . } run "$@";