提交 c368bfdc 编写于 作者: Z zhangjun

add low-precision, use_params for python

上级 6889e1f5
......@@ -40,6 +40,7 @@ DECLARE_int32(gpuid);
static const int max_batch = 32;
static const int min_subgraph_size = 3;
static predictor::Precision precision_type;
// Engine Base
class PaddleEngineBase {
......@@ -137,6 +138,7 @@ class PaddleInferenceEngine : public PaddleEngineBase {
// 2000MB GPU memory
config.EnableUseGpu(2000, FLAGS_gpuid);
}
precision_type = predictor::GetPrecision(FLAGS_precision);
if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
......@@ -145,15 +147,24 @@ class PaddleInferenceEngine : public PaddleEngineBase {
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
precision_type,
false,
use_calib);
// EnableMkldnnBfloat16();
LOG(INFO) << "create TensorRT predictor";
}
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
config.EnableLiteEngine(precision_type, true);
}
if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
(engine_conf.has_use_lite() && !engine_conf.use_lite() &&
engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
if (precision_type == Precision::kInt8) {
config.EnableMkldnnQuantizer();
} else if (precision_type == Precision::kHalf) {
config.EnableMkldnnBfloat16();
}
}
if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
......
......@@ -51,6 +51,16 @@ def serve_args():
"--name", type=str, default="None", help="Default service name")
parser.add_argument(
"--use_mkl", default=False, action="store_true", help="Use MKL")
parser.add_argument(
"--precision",
type=str,
default="fp32",
help="precision mode(fp32, int8, fp16, bf16)")
parser.add_argument(
"--use_calib",
default=False,
action="store_true",
help="Use TensorRT Calibration")
parser.add_argument(
"--mem_optim_off",
default=False,
......@@ -109,7 +119,7 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
if model == "":
print("You must specify your serving model")
exit(-1)
for single_model_config in args.model:
if os.path.isdir(single_model_config):
pass
......@@ -131,11 +141,10 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
infer_op_name = "general_detection"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
server = None
if use_multilang:
server = serving.MultiLangServer()
......@@ -148,6 +157,8 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
server.use_mkl(use_mkl)
server.set_max_body_size(max_body_size)
server.set_port(port)
server.set_precision(args.precision)
server.set_use_calib(args.use_calib)
server.use_encryption_model(use_encryption_model)
if args.product_name != None:
server.set_product_name(args.product_name)
......@@ -199,7 +210,7 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin
infer_op_name = "general_infer"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
......@@ -210,6 +221,8 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.use_mkl(use_mkl)
server.set_precision(args.precision)
server.set_use_calib(args.use_calib)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
......@@ -297,7 +310,8 @@ class MainService(BaseHTTPRequestHandler):
key = base64.b64decode(post_data["key"].encode())
for single_model_config in args.model:
if os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
with open(single_model_config + "/key", "wb") as f:
f.write(key)
return True
......@@ -309,7 +323,8 @@ class MainService(BaseHTTPRequestHandler):
key = base64.b64decode(post_data["key"].encode())
for single_model_config in args.model:
if os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
with open(single_model_config + "/key", "rb") as f:
cur_key = f.read()
if key != cur_key:
......@@ -394,7 +409,9 @@ if __name__ == "__main__":
device=args.device,
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim)
ir_optim=args.ir_optim,
precision=args.precision,
use_calib=args.use_calib)
web_service.run_rpc_service()
app_instance = Flask(__name__)
......
......@@ -65,6 +65,8 @@ class Server(object):
self.max_concurrency = 0
self.num_threads = 2
self.port = 8080
self.precision = "fp32"
self.use_calib = False
self.reload_interval_s = 10
self.max_body_size = 64 * 1024 * 1024
self.module_path = os.path.dirname(paddle_serving_server.__file__)
......@@ -108,6 +110,12 @@ class Server(object):
def set_port(self, port):
self.port = port
def set_precision(self, precision="fp32"):
self.precision = precision
def set_use_calib(self, use_calib=False):
self.use_calib = use_calib
def set_reload_interval(self, interval):
self.reload_interval_s = interval
......@@ -471,6 +479,8 @@ class Server(object):
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-precision {} " \
"-use_calib {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
......@@ -484,6 +494,8 @@ class Server(object):
self.max_concurrency,
self.num_threads,
self.port,
self.precision,
self.use_calib,
self.reload_interval_s,
self.workdir,
self.resource_fn,
......@@ -499,6 +511,8 @@ class Server(object):
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-precision {} " \
"-use_calib {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
......@@ -513,6 +527,8 @@ class Server(object):
self.max_concurrency,
self.num_threads,
self.port,
self.precision,
self.use_calib,
self.reload_interval_s,
self.workdir,
self.resource_fn,
......@@ -561,6 +577,12 @@ class MultiLangServer(object):
def set_port(self, port):
self.gport_ = port
def set_precision(self, precision="fp32"):
self.precision = precision
def set_use_calib(self, use_calib=False):
self.use_calib = use_calib
def set_reload_interval(self, interval):
self.bserver_.set_reload_interval(interval)
......
......@@ -27,6 +27,7 @@ import os
from paddle_serving_server import pipeline
from paddle_serving_server.pipeline import Op
def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
......@@ -36,6 +37,7 @@ def port_is_available(port):
else:
return False
class WebService(object):
def __init__(self, name="default_service"):
self.name = name
......@@ -63,7 +65,9 @@ class WebService(object):
def run_service(self):
self._server.run_server()
def load_model_config(self, server_config_dir_paths, client_config_path=None):
def load_model_config(self,
server_config_dir_paths,
client_config_path=None):
if isinstance(server_config_dir_paths, str):
server_config_dir_paths = [server_config_dir_paths]
elif isinstance(server_config_dir_paths, list):
......@@ -73,14 +77,16 @@ class WebService(object):
if os.path.isdir(single_model_config):
pass
elif os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
self.server_config_dir_paths = server_config_dir_paths
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
file_path_list = []
for single_model_config in self.server_config_dir_paths:
file_path_list.append( "{}/serving_server_conf.prototxt".format(single_model_config) )
file_path_list.append("{}/serving_server_conf.prototxt".format(
single_model_config))
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[0], 'r')
model_conf = google.protobuf.text_format.Merge(
......@@ -109,7 +115,9 @@ class WebService(object):
mem_optim=True,
use_lite=False,
use_xpu=False,
ir_optim=False):
ir_optim=False,
precision="fp32",
use_calib=False):
device = "gpu"
if gpuid == -1:
if use_lite:
......@@ -130,7 +138,7 @@ class WebService(object):
infer_op_name = "general_infer"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
......@@ -140,13 +148,16 @@ class WebService(object):
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_device(device)
server.set_precision(precision)
server.set_use_calib(use_calib)
if use_lite:
server.set_lite()
if use_xpu:
server.set_xpu()
server.load_model_config(self.server_config_dir_paths)#brpc Server support server_config_dir_paths
server.load_model_config(self.server_config_dir_paths
) #brpc Server support server_config_dir_paths
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
......@@ -159,6 +170,8 @@ class WebService(object):
workdir="",
port=9393,
device="gpu",
precision="fp32",
use_calib=False,
use_lite=False,
use_xpu=False,
ir_optim=False,
......@@ -188,7 +201,9 @@ class WebService(object):
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
ir_optim=ir_optim,
precision=precision,
use_calib=use_calib))
else:
for i, gpuid in enumerate(self.gpus):
self.rpc_service_list.append(
......@@ -200,7 +215,9 @@ class WebService(object):
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
ir_optim=ir_optim,
precision=precision,
use_calib=use_calib))
def _launch_web_service(self):
gpu_num = len(self.gpus)
......@@ -297,9 +314,13 @@ class WebService(object):
# default self.gpus = [0].
if len(self.gpus) == 0:
self.gpus.append(0)
self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=True, gpu_id=self.gpus[0])
self.client.load_model_config(
self.server_config_dir_paths[0],
use_gpu=True,
gpu_id=self.gpus[0])
else:
self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=False)
self.client.load_model_config(
self.server_config_dir_paths[0], use_gpu=False)
def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册