未验证 提交 87d4f7aa 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #1594 from TeslaZhao/develop

Update  params & kubernetes gen shell
......@@ -82,7 +82,6 @@ class LocalPredictor(object):
use_lite=False,
use_xpu=False,
precision="fp32",
use_calib=False,
use_mkldnn=False,
mkldnn_cache_capacity=0,
mkldnn_op_list=None,
......@@ -90,7 +89,8 @@ class LocalPredictor(object):
use_feed_fetch_ops=False,
use_ascend_cl=False,
min_subgraph_size=3,
dynamic_shape_info={}):
dynamic_shape_info={},
use_calib=False):
"""
Load model configs and create the paddle predictor by Paddle Inference API.
......@@ -109,7 +109,6 @@ class LocalPredictor(object):
use_lite: use Paddle-Lite Engint, False default
use_xpu: run predict on Baidu Kunlun, False default
precision: precision mode, "fp32" default
use_calib: use TensorRT calibration, False default
use_mkldnn: use MKLDNN, False default.
mkldnn_cache_capacity: cache capacity for input shapes, 0 default.
mkldnn_op_list: op list accelerated using MKLDNN, None default.
......@@ -118,6 +117,7 @@ class LocalPredictor(object):
use_ascend_cl: run predict on Huawei Ascend, False default
min_subgraph_size: the minimal subgraph size for opening tensorrt to optimize, 3 default
dynamic_shape_info: dict including min_input_shape,max_input_shape, opt_input_shape, {} default
use_calib: use TensorRT calibration, False default
"""
gpu_id = int(gpu_id)
client_config = "{}/serving_server_conf.prototxt".format(model_path)
......@@ -221,7 +221,7 @@ class LocalPredictor(object):
max_batch_size=32,
min_subgraph_size=min_subgraph_size,
use_static=False,
use_calib_mode=False)
use_calib_mode=use_calib)
if len(dynamic_shape_info):
config.set_trt_dynamic_shape_info(
......
......@@ -52,7 +52,8 @@ class LocalServiceHandler(object):
mkldnn_op_list=None,
mkldnn_bf16_op_list=None,
min_subgraph_size=3,
dynamic_shape_info={}):
dynamic_shape_info={},
use_calib=False):
"""
Initialization of localservicehandler
......@@ -75,6 +76,7 @@ class LocalServiceHandler(object):
mkldnn_cache_capacity: cache capacity of mkldnn, 0 means no limit.
mkldnn_op_list: OP list optimized by mkldnn, None default.
mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default.
use_calib: set inference use_calib_mode param, False default.
Returns:
None
......@@ -96,6 +98,7 @@ class LocalServiceHandler(object):
self._mkldnn_bf16_op_list = None
self.min_subgraph_size = 3
self.dynamic_shape_info = {}
self._use_calib = False
if device_type == -1:
# device_type is not set, determined by `devices`,
......@@ -175,23 +178,24 @@ class LocalServiceHandler(object):
self._mkldnn_cache_capacity = mkldnn_cache_capacity
self._mkldnn_op_list = mkldnn_op_list
self._mkldnn_bf16_op_list = mkldnn_bf16_op_list
self._use_calib = use_calib
_LOGGER.info(
"Models({}) will be launched by device {}. use_gpu:{}, "
"use_trt:{}, use_lite:{}, use_xpu:{}, device_type:{}, devices:{}, "
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
"client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, "
"mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, "
"use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{},"
"is_set_dynamic_shape_info:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices,
self._mem_optim, self._ir_optim, self._use_profile,
self._thread_num, self._client_type, self._fetch_names,
self._precision, self._use_mkldnn, self._mkldnn_cache_capacity,
self._mkldnn_op_list, self._mkldnn_bf16_op_list,
self._use_ascend_cl, self.min_subgraph_size,
bool(len(self.dynamic_shape_info))))
self._precision, self._use_calib, self._use_mkldnn,
self._mkldnn_cache_capacity, self._mkldnn_op_list,
self._mkldnn_bf16_op_list, self._use_ascend_cl,
self.min_subgraph_size, bool(len(self.dynamic_shape_info))))
def get_fetch_list(self):
return self._fetch_names
......@@ -250,7 +254,8 @@ class LocalServiceHandler(object):
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
use_ascend_cl=self._use_ascend_cl,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
dynamic_shape_info=self.dynamic_shape_info,
use_calib=self._use_calib)
return self._local_predictor_client
def get_client_config(self):
......
......@@ -193,6 +193,7 @@ class Op(object):
self.mkldnn_op_list = None
self.mkldnn_bf16_op_list = None
self.min_subgraph_size = 3
self.use_calib = False
if self._server_endpoints is None:
server_endpoints = conf.get("server_endpoints", [])
......@@ -216,6 +217,7 @@ class Op(object):
self.ir_optim = local_service_conf.get("ir_optim")
self._fetch_names = local_service_conf.get("fetch_list")
self.precision = local_service_conf.get("precision")
self.use_calib = local_service_conf.get("use_calib")
self.use_mkldnn = local_service_conf.get("use_mkldnn")
self.mkldnn_cache_capacity = local_service_conf.get(
"mkldnn_cache_capacity")
......@@ -248,7 +250,8 @@ class Op(object):
mkldnn_op_list=self.mkldnn_bf16_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
dynamic_shape_info=self.dynamic_shape_info,
use_calib=self.use_calib)
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
......@@ -278,7 +281,8 @@ class Op(object):
mkldnn_op_list=self.mkldnn_op_list,
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
min_subgraph_size=self.min_subgraph_size,
dynamic_shape_info=self.dynamic_shape_info)
dynamic_shape_info=self.dynamic_shape_info,
use_calib=self.use_calib)
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
......@@ -784,8 +788,8 @@ class Op(object):
self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops(),
self.min_subgraph_size,
self.dynamic_shape_info))
self.min_subgraph_size, self.dynamic_shape_info,
self.use_calib))
p.daemon = True
p.start()
process.append(p)
......@@ -819,12 +823,12 @@ class Op(object):
self._get_output_channels(), True, trace_buffer,
self.model_config, self.workdir, self.thread_num,
self.device_type, self.devices, self.mem_optim,
self.ir_optim, self.precision, self.use_mkldnn,
self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(),
self.ir_optim, self.precision, self.use_mkldnn,
self.mkldnn_cache_capacity, self.mkldnn_op_list,
self.mkldnn_bf16_op_list, self.is_jump_op(),
self.get_output_channels_of_jump_ops(),
self.min_subgraph_size,
self.dynamic_shape_info))
self.min_subgraph_size, self.dynamic_shape_info,
self.use_calib))
# When a process exits, it attempts to terminate
# all of its daemonic child processes.
t.daemon = True
......@@ -1283,9 +1287,10 @@ class Op(object):
def _run(self, concurrency_idx, input_channel, output_channels,
is_thread_op, trace_buffer, model_config, workdir, thread_num,
device_type, devices, mem_optim, ir_optim, precision, use_mkldnn,
mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list,
is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info):
device_type, devices, mem_optim, ir_optim, precision,
use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
mkldnn_bf16_op_list, is_jump_op, output_channels_of_jump_ops,
min_subgraph_size, dynamic_shape_info, use_calib):
"""
_run() is the entry function of OP process / thread model.When client
type is local_predictor in process mode, the CUDA environment needs to
......@@ -1314,6 +1319,7 @@ class Op(object):
mkldnn_bf16_op_list: OP list optimized by mkldnn bf16, None default.
is_jump_op: OP has jump op list or not, False default.
output_channels_of_jump_ops: all output channels of jump ops.
use_calib: use calib mode of paddle inference, False default.
Returns:
None
......@@ -1339,7 +1345,8 @@ class Op(object):
mkldnn_op_list=mkldnn_op_list,
mkldnn_bf16_op_list=mkldnn_bf16_op_list,
min_subgraph_size=min_subgraph_size,
dynamic_shape_info=dynamic_shape_info)
dynamic_shape_info=dynamic_shape_info,
use_calib=use_calib)
_LOGGER.info("Init cuda env in process {}".format(
concurrency_idx))
......
......@@ -421,6 +421,7 @@ class ServerYamlConfChecker(object):
"use_calib": False,
"use_mkldnn": False,
"mkldnn_cache_capacity": 0,
"min_subgraph_size": 3,
}
conf_type = {
"model_config": str,
......@@ -436,6 +437,7 @@ class ServerYamlConfChecker(object):
"mkldnn_cache_capacity": int,
"mkldnn_op_list": list,
"mkldnn_bf16_op_list": list,
"min_subgraph_size": int,
}
conf_qualification = {"thread_num": (">=", 1), }
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
......
......@@ -9,9 +9,9 @@ function usage
echo " ";
echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11.2";
echo " --python : python version, 3.6/3.7/3.8 ";
#echo " --serving : serving version(0.6.0/0.6.2)";
#echo " --paddle : paddle version(2.1.0/2.2.0)"
echo " --image_name : image name(default serving_runtime:env-python)";
echo " --serving : serving version(0.7.0/0.6.2)";
echo " --paddle : paddle version(2.2.0/2.1.2)"
echo " --image_name : image name(default serving_runtime:env-python)"
echo " -h | --help : helper";
}
......@@ -25,9 +25,9 @@ function parse_args
case "$1" in
--env ) env="$2"; shift;;
--python ) python="$2"; shift;;
#--serving ) serving="$2"; shift;;
#--paddle ) paddle="$2"; shift;;
--image_name ) image_name="$2"; shift;;
--serving ) serving="$2"; shift;;
--paddle ) paddle="$2"; shift;;
--image_name ) image_name="$2"; shift;;
-h | --help ) usage; exit;; # quit and show usage
* ) args+=("$1") # if no match, add it to the positional args
esac
......@@ -41,7 +41,7 @@ function parse_args
positional_2="${args[1]}"
# validate required args
if [[ -z "${env}" || -z "${python}" ]]; then
if [[ -z "${paddle}" || -z "${env}" || -z "${python}" || -z "${serving}" ]]; then
echo "Invalid arguments. paddle or env or python or serving is missing."
usage
exit;
......@@ -57,8 +57,6 @@ function parse_args
function run
{
python="2.2.0"
serving="0.7.0"
parse_args "$@"
echo "named arg: env: $env"
......@@ -71,6 +69,8 @@ function run
elif [ $env == "cuda11.2" ]; then
base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04"
fi
#python="2.2.0"
#serving="0.7.0"
echo "base image: $base_image"
echo "named arg: python: $python"
echo "named arg: serving: $serving"
......@@ -78,8 +78,7 @@ function run
echo "named arg: image_name: $image_name"
sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" -e "s/<<serving_version>>/$serving/g" -e "s/<<paddle_version>>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp
#docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
docker build -t $image_name -f Dockerfile.tmp .
docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
}
run "$@";
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册