未验证 提交 87d4f7aa 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #1594 from TeslaZhao/develop

Update  params & kubernetes gen shell
...@@ -82,7 +82,6 @@ class LocalPredictor(object): ...@@ -82,7 +82,6 @@ class LocalPredictor(object):
use_lite=False, use_lite=False,
use_xpu=False, use_xpu=False,
precision="fp32", precision="fp32",
use_calib=False,
use_mkldnn=False, use_mkldnn=False,
mkldnn_cache_capacity=0, mkldnn_cache_capacity=0,
mkldnn_op_list=None, mkldnn_op_list=None,
...@@ -90,7 +89,8 @@ class LocalPredictor(object): ...@@ -90,7 +89,8 @@ class LocalPredictor(object):
use_feed_fetch_ops=False, use_feed_fetch_ops=False,
use_ascend_cl=False, use_ascend_cl=False,
min_subgraph_size=3, min_subgraph_size=3,
dynamic_shape_info={}): dynamic_shape_info={},
use_calib=False):
""" """
Load model configs and create the paddle predictor by Paddle Inference API. Load model configs and create the paddle predictor by Paddle Inference API.
...@@ -109,7 +109,6 @@ class LocalPredictor(object): ...@@ -109,7 +109,6 @@ class LocalPredictor(object):
use_lite: use Paddle-Lite Engint, False default use_lite: use Paddle-Lite Engint, False default
use_xpu: run predict on Baidu Kunlun, False default use_xpu: run predict on Baidu Kunlun, False default
precision: precision mode, "fp32" default precision: precision mode, "fp32" default
use_calib: use TensorRT calibration, False default
use_mkldnn: use MKLDNN, False default. use_mkldnn: use MKLDNN, False default.
mkldnn_cache_capacity: cache capacity for input shapes, 0 default. mkldnn_cache_capacity: cache capacity for input shapes, 0 default.
mkldnn_op_list: op list accelerated using MKLDNN, None default. mkldnn_op_list: op list accelerated using MKLDNN, None default.
...@@ -118,6 +117,7 @@ class LocalPredictor(object): ...@@ -118,6 +117,7 @@ class LocalPredictor(object):
use_ascend_cl: run predict on Huawei Ascend, False default use_ascend_cl: run predict on Huawei Ascend, False default
min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default
dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default
use_calib: use TensorRT calibration, False default
""" """
gpu_id = int(gpu_id) gpu_id = int(gpu_id)
client_config = "{}/serving_server_conf.prototxt".format(model_path) client_config = "{}/serving_server_conf.prototxt".format(model_path)
...@@ -221,7 +221,7 @@ class LocalPredictor(object): ...@@ -221,7 +221,7 @@ class LocalPredictor(object):
max_batch_size=32, max_batch_size=32,
min_subgraph_size=min_subgraph_size, min_subgraph_size=min_subgraph_size,
use_static=False, use_static=False,
use_calib_mode=False) use_calib_mode=use_calib)
if len(dynamic_shape_info): if len(dynamic_shape_info):
config.set_trt_dynamic_shape_info( config.set_trt_dynamic_shape_info(
......
...@@ -52,7 +52,8 @@ class LocalServiceHandler(object): ...@@ -52,7 +52,8 @@ class LocalServiceHandler(object):
mkldnn_op_list=None, mkldnn_op_list=None,
mkldnn_bf16_op_list=None, mkldnn_bf16_op_list=None,
min_subgraph_size=3, min_subgraph_size=3,
dynamic_shape_info={}): dynamic_shape_info={},
use_calib=False):
""" """
Initialization of localservicehandler Initialization of localservicehandler
...@@ -75,6 +76,7 @@ class LocalServiceHandler(object): ...@@ -75,6 +76,7 @@ class LocalServiceHandler(object):
mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit. mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit.
mkldnn_op_list: OP list optimized by mkldnn, None default. mkldnn_op_list: OP list optimized by mkldnn, None default.
mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default. mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default.
use_calib: set inference use_calib_mode param, False default.
Returns: Returns:
None None
...@@ -96,6 +98,7 @@ class LocalServiceHandler(object): ...@@ -96,6 +98,7 @@ class LocalServiceHandler(object):
self._mkldnn_bf16_op_list = None self._mkldnn_bf16_op_list = None
self.min_subgraph_size = 3 self.min_subgraph_size = 3
self.dynamic_shape_info = {} self.dynamic_shape_info = {}
self._use_calib = False
if device_type == -1: if device_type == -1:
# device_type is not set, determined by `devices`, # device_type is not set, determined by `devices`,
...@@ -175,23 +178,24 @@ class LocalServiceHandler(object): ...@@ -175,23 +178,24 @@ class LocalServiceHandler(object):
self._mkldnn_cache_capacity = mkldnn_cache_capacity self._mkldnn_cache_capacity = mkldnn_cache_capacity
self._mkldnn_op_list = mkldnn_op_list self._mkldnn_op_list = mkldnn_op_list
self._mkldnn_bf16_op_list = mkldnn_bf16_op_list self._mkldnn_bf16_op_list = mkldnn_bf16_op_list
self._use_calib = use_calib
_LOGGER.info( _LOGGER.info(
"Models({}) will be launched by device {}. use_gpu:{}, " "Models({}) will be launched by device {}. use_gpu:{}, "
"use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, " "use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, "
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, " "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
"client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, " "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, "
"mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}," "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{},"
"is_set_dynamic_shape_info:{}".format( "is_set_dynamic_shape_info:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt, model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices, self._use_lite, self._use_xpu, device_type, self._devices,
self._mem_optim, self._ir_optim, self._use_profile, self._mem_optim, self._ir_optim, self._use_profile,
self._thread_num, self._client_type, self._fetch_names, self._thread_num, self._client_type, self._fetch_names,
self._precision, self._use_mkldnn, self._mkldnn_cache_capacity, self._precision, self._use_calib, self._use_mkldnn,
self._mkldnn_op_list, self._mkldnn_bf16_op_list, self._mkldnn_cache_capacity, self._mkldnn_op_list,
self._use_ascend_cl, self.min_subgraph_size, self._mkldnn_bf16_op_list, self._use_ascend_cl,
bool(len(self.dynamic_shape_info)))) self.min_subgraph_size, bool(len(self.dynamic_shape_info))))
def get_fetch_list(self): def get_fetch_list(self):
return self._fetch_names return self._fetch_names
...@@ -250,7 +254,8 @@ class LocalServiceHandler(object): ...@@ -250,7 +254,8 @@ class LocalServiceHandler(object):
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
use_ascend_cl=self._use_ascend_cl, use_ascend_cl=self._use_ascend_cl,
min_subgraph_size=self.min_subgraph_size, min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info) dynamic_shape_info=self.dynamic_shape_info,
use_calib=self._use_calib)
return self._local_predictor_client return self._local_predictor_client
def get_client_config(self): def get_client_config(self):
......
...@@ -193,6 +193,7 @@ class Op(object): ...@@ -193,6 +193,7 @@ class Op(object):
self.mkldnn_op_list = None self.mkldnn_op_list = None
self.mkldnn_bf16_op_list = None self.mkldnn_bf16_op_list = None
self.min_subgraph_size = 3 self.min_subgraph_size = 3
self.use_calib = False
if self._server_endpoints is None: if self._server_endpoints is None:
server_endpoints = conf.get("server_endpoints", []) server_endpoints = conf.get("server_endpoints", [])
...@@ -216,6 +217,7 @@ class Op(object): ...@@ -216,6 +217,7 @@ class Op(object):
self.ir_optim = local_service_conf.get("ir_optim") self.ir_optim = local_service_conf.get("ir_optim")
self._fetch_names = local_service_conf.get("fetch_list") self._fetch_names = local_service_conf.get("fetch_list")
self.precision = local_service_conf.get("precision") self.precision = local_service_conf.get("precision")
self.use_calib = local_service_conf.get("use_calib")
self.use_mkldnn = local_service_conf.get("use_mkldnn") self.use_mkldnn = local_service_conf.get("use_mkldnn")
self.mkldnn_cache_capacity = local_service_conf.get( self.mkldnn_cache_capacity = local_service_conf.get(
"mkldnn_cache_capacity") "mkldnn_cache_capacity")
...@@ -248,7 +250,8 @@ class Op(object): ...@@ -248,7 +250,8 @@ class Op(object):
mkldnn_op_list=self.mkldnn_bf16_op_list, mkldnn_op_list=self.mkldnn_bf16_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size, min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info) dynamic_shape_info=self.dynamic_shape_info,
use_calib=self.use_calib)
service_handler.prepare_server() # get fetch_list service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list() serivce_ports = service_handler.get_port_list()
self._server_endpoints = [ self._server_endpoints = [
...@@ -278,7 +281,8 @@ class Op(object): ...@@ -278,7 +281,8 @@ class Op(object):
mkldnn_op_list=self.mkldnn_op_list, mkldnn_op_list=self.mkldnn_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size, min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info) dynamic_shape_info=self.dynamic_shape_info,
use_calib=self.use_calib)
if self._client_config is None: if self._client_config is None:
self._client_config = service_handler.get_client_config( self._client_config = service_handler.get_client_config(
) )
...@@ -784,8 +788,8 @@ class Op(object): ...@@ -784,8 +788,8 @@ class Op(object):
self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(), self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops(), self.get_output_channels_of_jump_ops(),
self.min_subgraph_size, self.min_subgraph_size, self.dynamic_shape_info,
self.dynamic_shape_info)) self.use_calib))
p.daemon = True p.daemon = True
p.start() p.start()
process.append(p) process.append(p)
...@@ -823,8 +827,8 @@ class Op(object): ...@@ -823,8 +827,8 @@ class Op(object):
self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(), self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops(), self.get_output_channels_of_jump_ops(),
self.min_subgraph_size, self.min_subgraph_size, self.dynamic_shape_info,
self.dynamic_shape_info)) self.use_calib))
# When a process exits, it attempts to terminate # When a process exits, it attempts to terminate
# all of its daemonic child processes. # all of its daemonic child processes.
t.daemon = True t.daemon = True
...@@ -1283,9 +1287,10 @@ class Op(object): ...@@ -1283,9 +1287,10 @@ class Op(object):
def _run(self, concurrency_idx, input_channel, output_channels, def _run(self, concurrency_idx, input_channel, output_channels,
is_thread_op, trace_buffer, model_config, workdir, thread_num, is_thread_op, trace_buffer, model_config, workdir, thread_num,
device_type, devices, mem_optim, ir_optim, precision, use_mkldnn, device_type, devices, mem_optim, ir_optim, precision,
mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info): mkldnn_bf16_op_list, is_jump_op, output_channels_of_jump_ops,
min_subgraph_size, dynamic_shape_info, use_calib):
""" """
_run() is the entry function of OP process / thread model.When client _run() is the entry function of OP process / thread model.When client
type is local_predictor in process mode, the CUDA environment needs to type is local_predictor in process mode, the CUDA environment needs to
...@@ -1314,6 +1319,7 @@ class Op(object): ...@@ -1314,6 +1319,7 @@ class Op(object):
mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default. mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default.
is_jump_op: OP has jump op list or not, False default. is_jump_op: OP has jump op list or not, False default.
output_channels_of_jump_ops: all output channels of jump ops. output_channels_of_jump_ops: all output channels of jump ops.
use_calib: use calib mode of paddle inference, False default.
Returns: Returns:
None None
...@@ -1339,7 +1345,8 @@ class Op(object): ...@@ -1339,7 +1345,8 @@ class Op(object):
mkldnn_op_list=mkldnn_op_list, mkldnn_op_list=mkldnn_op_list,
mkldnn_bf16_op_list=mkldnn_bf16_op_list, mkldnn_bf16_op_list=mkldnn_bf16_op_list,
min_subgraph_size=min_subgraph_size, min_subgraph_size=min_subgraph_size,
dynamic_shape_info=dynamic_shape_info) dynamic_shape_info=dynamic_shape_info,
use_calib=use_calib)
_LOGGER.info("Init cuda env in process {}".format( _LOGGER.info("Init cuda env in process {}".format(
concurrency_idx)) concurrency_idx))
......
...@@ -421,6 +421,7 @@ class ServerYamlConfChecker(object): ...@@ -421,6 +421,7 @@ class ServerYamlConfChecker(object):
"use_calib": False, "use_calib": False,
"use_mkldnn": False, "use_mkldnn": False,
"mkldnn_cache_capacity": 0, "mkldnn_cache_capacity": 0,
"min_subgraph_size": 3,
} }
conf_type = { conf_type = {
"model_config": str, "model_config": str,
...@@ -436,6 +437,7 @@ class ServerYamlConfChecker(object): ...@@ -436,6 +437,7 @@ class ServerYamlConfChecker(object):
"mkldnn_cache_capacity": int, "mkldnn_cache_capacity": int,
"mkldnn_op_list": list, "mkldnn_op_list": list,
"mkldnn_bf16_op_list": list, "mkldnn_bf16_op_list": list,
"min_subgraph_size": int,
} }
conf_qualification = {"thread_num": (">=", 1), } conf_qualification = {"thread_num": (">=", 1), }
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type, ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
......
...@@ -9,9 +9,9 @@ function usage ...@@ -9,9 +9,9 @@ function usage
echo " "; echo " ";
echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11.2"; echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11.2";
echo " --python : python version, 3.6/3.7/3.8 "; echo " --python : python version, 3.6/3.7/3.8 ";
#echo " --serving : serving version(0.6.0/0.6.2)"; echo " --serving : serving version(0.7.0/0.6.2)";
#echo " --paddle : paddle version(2.1.0/2.2.0)" echo " --paddle : paddle version(2.2.0/2.1.2)"
echo " --image_name : image name(default serving_runtime:env-python)"; echo " --image_name : image name(default serving_runtime:env-python)"
echo " -h | --help : helper"; echo " -h | --help : helper";
} }
...@@ -25,8 +25,8 @@ function parse_args ...@@ -25,8 +25,8 @@ function parse_args
case "$1" in case "$1" in
--env ) env="$2"; shift;; --env ) env="$2"; shift;;
--python ) python="$2"; shift;; --python ) python="$2"; shift;;
#--serving ) serving="$2"; shift;; --serving ) serving="$2"; shift;;
#--paddle ) paddle="$2"; shift;; --paddle ) paddle="$2"; shift;;
--image_name ) image_name="$2"; shift;; --image_name ) image_name="$2"; shift;;
-h | --help ) usage; exit;; # quit and show usage -h | --help ) usage; exit;; # quit and show usage
* ) args+=("$1") # if no match, add it to the positional args * ) args+=("$1") # if no match, add it to the positional args
...@@ -41,7 +41,7 @@ function parse_args ...@@ -41,7 +41,7 @@ function parse_args
positional_2="${args[1]}" positional_2="${args[1]}"
# validate required args # validate required args
if [[ -z "${env}" || -z "${python}" ]]; then if [[ -z "${paddle}" || -z "${env}" || -z "${python}" || -z "${serving}" ]]; then
echo "Invalid arguments. paddle or env or python or serving is missing." echo "Invalid arguments. paddle or env or python or serving is missing."
usage usage
exit; exit;
...@@ -57,8 +57,6 @@ function parse_args ...@@ -57,8 +57,6 @@ function parse_args
function run function run
{ {
python="2.2.0"
serving="0.7.0"
parse_args "$@" parse_args "$@"
echo "named arg: env: $env" echo "named arg: env: $env"
...@@ -71,6 +69,8 @@ function run ...@@ -71,6 +69,8 @@ function run
elif [ $env == "cuda11.2" ]; then elif [ $env == "cuda11.2" ]; then
base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04" base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04"
fi fi
#python="2.2.0"
#serving="0.7.0"
echo "base image: $base_image" echo "base image: $base_image"
echo "named arg: python: $python" echo "named arg: python: $python"
echo "named arg: serving: $serving" echo "named arg: serving: $serving"
...@@ -78,8 +78,7 @@ function run ...@@ -78,8 +78,7 @@ function run
echo "named arg: image_name: $image_name" echo "named arg: image_name: $image_name"
sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" -e "s/<<serving_version>>/$serving/g" -e "s/<<paddle_version>>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" -e "s/<<serving_version>>/$serving/g" -e "s/<<paddle_version>>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp
#docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp . docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
docker build -t $image_name -f Dockerfile.tmp .
} }
run "$@"; run "$@";
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册